626b1e61d18100986e43fc3a2b440400ba569463
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_dricommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
137 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
138 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
139 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
140 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
141
142 static uint32_t float_to_uint (float f) 
143 {
144     union {
145         uint32_t i; 
146         float f;
147     } x;
148
149     x.f = f;
150     return x.i;
151 }
152
153 enum 
154 {
155     SF_KERNEL = 0,
156     PS_KERNEL,
157     PS_SUBPIC_KERNEL
158 };
159
160 static struct i965_kernel render_kernels_gen4[] = {
161     {
162         "SF",
163         SF_KERNEL,
164         sf_kernel_static,
165         sizeof(sf_kernel_static),
166         NULL
167     },
168     {
169         "PS",
170         PS_KERNEL,
171         ps_kernel_static,
172         sizeof(ps_kernel_static),
173         NULL
174     },
175
176     {
177         "PS_SUBPIC",
178         PS_SUBPIC_KERNEL,
179         ps_subpic_kernel_static,
180         sizeof(ps_subpic_kernel_static),
181         NULL
182     }
183 };
184
185 static struct i965_kernel render_kernels_gen5[] = {
186     {
187         "SF",
188         SF_KERNEL,
189         sf_kernel_static_gen5,
190         sizeof(sf_kernel_static_gen5),
191         NULL
192     },
193     {
194         "PS",
195         PS_KERNEL,
196         ps_kernel_static_gen5,
197         sizeof(ps_kernel_static_gen5),
198         NULL
199     },
200
201     {
202         "PS_SUBPIC",
203         PS_SUBPIC_KERNEL,
204         ps_subpic_kernel_static_gen5,
205         sizeof(ps_subpic_kernel_static_gen5),
206         NULL
207     }
208 };
209
210 static struct i965_kernel render_kernels_gen6[] = {
211     {
212         "SF",
213         SF_KERNEL,
214         sf_kernel_static_gen6,
215         sizeof(sf_kernel_static_gen6),
216         NULL
217     },
218     {
219         "PS",
220         PS_KERNEL,
221         ps_kernel_static_gen6,
222         sizeof(ps_kernel_static_gen6),
223         NULL
224     },
225
226     {
227         "PS_SUBPIC",
228         PS_SUBPIC_KERNEL,
229         ps_subpic_kernel_static_gen6,
230         sizeof(ps_subpic_kernel_static_gen6),
231         NULL
232     }
233 };
234
235 static struct i965_kernel render_kernels_gen7[] = {
236     {
237         "SF",
238         SF_KERNEL,
239         sf_kernel_static_gen7,
240         sizeof(sf_kernel_static_gen7),
241         NULL
242     },
243     {
244         "PS",
245         PS_KERNEL,
246         ps_kernel_static_gen7,
247         sizeof(ps_kernel_static_gen7),
248         NULL
249     },
250
251     {
252         "PS_SUBPIC",
253         PS_SUBPIC_KERNEL,
254         ps_subpic_kernel_static_gen7,
255         sizeof(ps_subpic_kernel_static_gen7),
256         NULL
257     }
258 };
259
260 #define URB_VS_ENTRIES        8
261 #define URB_VS_ENTRY_SIZE     1
262
263 #define URB_GS_ENTRIES        0
264 #define URB_GS_ENTRY_SIZE     0
265
266 #define URB_CLIP_ENTRIES      0
267 #define URB_CLIP_ENTRY_SIZE   0
268
269 #define URB_SF_ENTRIES        1
270 #define URB_SF_ENTRY_SIZE     2
271
272 #define URB_CS_ENTRIES        1
273 #define URB_CS_ENTRY_SIZE     1
274
275 static void
276 i965_render_vs_unit(VADriverContextP ctx)
277 {
278     struct i965_driver_data *i965 = i965_driver_data(ctx);
279     struct i965_render_state *render_state = &i965->render_state;
280     struct i965_vs_unit_state *vs_state;
281
282     dri_bo_map(render_state->vs.state, 1);
283     assert(render_state->vs.state->virtual);
284     vs_state = render_state->vs.state->virtual;
285     memset(vs_state, 0, sizeof(*vs_state));
286
287     if (IS_IRONLAKE(i965->intel.device_id))
288         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
289     else
290         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
291
292     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
293     vs_state->vs6.vs_enable = 0;
294     vs_state->vs6.vert_cache_disable = 1;
295     
296     dri_bo_unmap(render_state->vs.state);
297 }
298
299 static void
300 i965_render_sf_unit(VADriverContextP ctx)
301 {
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     struct i965_render_state *render_state = &i965->render_state;
304     struct i965_sf_unit_state *sf_state;
305
306     dri_bo_map(render_state->sf.state, 1);
307     assert(render_state->sf.state->virtual);
308     sf_state = render_state->sf.state->virtual;
309     memset(sf_state, 0, sizeof(*sf_state));
310
311     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
312     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
313
314     sf_state->sf1.single_program_flow = 1; /* XXX */
315     sf_state->sf1.binding_table_entry_count = 0;
316     sf_state->sf1.thread_priority = 0;
317     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
318     sf_state->sf1.illegal_op_exception_enable = 1;
319     sf_state->sf1.mask_stack_exception_enable = 1;
320     sf_state->sf1.sw_exception_enable = 1;
321
322     /* scratch space is not used in our kernel */
323     sf_state->thread2.per_thread_scratch_space = 0;
324     sf_state->thread2.scratch_space_base_pointer = 0;
325
326     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
327     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
328     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
329     sf_state->thread3.urb_entry_read_offset = 0;
330     sf_state->thread3.dispatch_grf_start_reg = 3;
331
332     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
333     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
334     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
335     sf_state->thread4.stats_enable = 1;
336
337     sf_state->sf5.viewport_transform = 0; /* skip viewport */
338
339     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
340     sf_state->sf6.scissor = 0;
341
342     sf_state->sf7.trifan_pv = 2;
343
344     sf_state->sf6.dest_org_vbias = 0x8;
345     sf_state->sf6.dest_org_hbias = 0x8;
346
347     dri_bo_emit_reloc(render_state->sf.state,
348                       I915_GEM_DOMAIN_INSTRUCTION, 0,
349                       sf_state->thread0.grf_reg_count << 1,
350                       offsetof(struct i965_sf_unit_state, thread0),
351                       render_state->render_kernels[SF_KERNEL].bo);
352
353     dri_bo_unmap(render_state->sf.state);
354 }
355
356 static void 
357 i965_render_sampler(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sampler_state *sampler_state;
362     int i;
363     
364     assert(render_state->wm.sampler_count > 0);
365     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
366
367     dri_bo_map(render_state->wm.sampler, 1);
368     assert(render_state->wm.sampler->virtual);
369     sampler_state = render_state->wm.sampler->virtual;
370     for (i = 0; i < render_state->wm.sampler_count; i++) {
371         memset(sampler_state, 0, sizeof(*sampler_state));
372         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
373         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
374         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
375         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
376         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
377         sampler_state++;
378     }
379
380     dri_bo_unmap(render_state->wm.sampler);
381 }
382 static void
383 i965_subpic_render_wm_unit(VADriverContextP ctx)
384 {
385     struct i965_driver_data *i965 = i965_driver_data(ctx);
386     struct i965_render_state *render_state = &i965->render_state;
387     struct i965_wm_unit_state *wm_state;
388
389     assert(render_state->wm.sampler);
390
391     dri_bo_map(render_state->wm.state, 1);
392     assert(render_state->wm.state->virtual);
393     wm_state = render_state->wm.state->virtual;
394     memset(wm_state, 0, sizeof(*wm_state));
395
396     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
397     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
398
399     wm_state->thread1.single_program_flow = 1; /* XXX */
400
401     if (IS_IRONLAKE(i965->intel.device_id))
402         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
403     else
404         wm_state->thread1.binding_table_entry_count = 7;
405
406     wm_state->thread2.scratch_space_base_pointer = 0;
407     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
408
409     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
410     wm_state->thread3.const_urb_entry_read_length = 0;
411     wm_state->thread3.const_urb_entry_read_offset = 0;
412     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
413     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
414
415     wm_state->wm4.stats_enable = 0;
416     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
417
418     if (IS_IRONLAKE(i965->intel.device_id)) {
419         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
420         wm_state->wm5.max_threads = 12 * 6 - 1;
421     } else {
422         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
423         wm_state->wm5.max_threads = 10 * 5 - 1;
424     }
425
426     wm_state->wm5.thread_dispatch_enable = 1;
427     wm_state->wm5.enable_16_pix = 1;
428     wm_state->wm5.enable_8_pix = 0;
429     wm_state->wm5.early_depth_test = 1;
430
431     dri_bo_emit_reloc(render_state->wm.state,
432                       I915_GEM_DOMAIN_INSTRUCTION, 0,
433                       wm_state->thread0.grf_reg_count << 1,
434                       offsetof(struct i965_wm_unit_state, thread0),
435                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
436
437     dri_bo_emit_reloc(render_state->wm.state,
438                       I915_GEM_DOMAIN_INSTRUCTION, 0,
439                       wm_state->wm4.sampler_count << 2,
440                       offsetof(struct i965_wm_unit_state, wm4),
441                       render_state->wm.sampler);
442
443     dri_bo_unmap(render_state->wm.state);
444 }
445
446
447 static void
448 i965_render_wm_unit(VADriverContextP ctx)
449 {
450     struct i965_driver_data *i965 = i965_driver_data(ctx);
451     struct i965_render_state *render_state = &i965->render_state;
452     struct i965_wm_unit_state *wm_state;
453
454     assert(render_state->wm.sampler);
455
456     dri_bo_map(render_state->wm.state, 1);
457     assert(render_state->wm.state->virtual);
458     wm_state = render_state->wm.state->virtual;
459     memset(wm_state, 0, sizeof(*wm_state));
460
461     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
462     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
463
464     wm_state->thread1.single_program_flow = 1; /* XXX */
465
466     if (IS_IRONLAKE(i965->intel.device_id))
467         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
468     else
469         wm_state->thread1.binding_table_entry_count = 7;
470
471     wm_state->thread2.scratch_space_base_pointer = 0;
472     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
473
474     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
475     wm_state->thread3.const_urb_entry_read_length = 1;
476     wm_state->thread3.const_urb_entry_read_offset = 0;
477     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
478     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
479
480     wm_state->wm4.stats_enable = 0;
481     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
482
483     if (IS_IRONLAKE(i965->intel.device_id)) {
484         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
485         wm_state->wm5.max_threads = 12 * 6 - 1;
486     } else {
487         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
488         wm_state->wm5.max_threads = 10 * 5 - 1;
489     }
490
491     wm_state->wm5.thread_dispatch_enable = 1;
492     wm_state->wm5.enable_16_pix = 1;
493     wm_state->wm5.enable_8_pix = 0;
494     wm_state->wm5.early_depth_test = 1;
495
496     dri_bo_emit_reloc(render_state->wm.state,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       wm_state->thread0.grf_reg_count << 1,
499                       offsetof(struct i965_wm_unit_state, thread0),
500                       render_state->render_kernels[PS_KERNEL].bo);
501
502     dri_bo_emit_reloc(render_state->wm.state,
503                       I915_GEM_DOMAIN_INSTRUCTION, 0,
504                       wm_state->wm4.sampler_count << 2,
505                       offsetof(struct i965_wm_unit_state, wm4),
506                       render_state->wm.sampler);
507
508     dri_bo_unmap(render_state->wm.state);
509 }
510
511 static void 
512 i965_render_cc_viewport(VADriverContextP ctx)
513 {
514     struct i965_driver_data *i965 = i965_driver_data(ctx);
515     struct i965_render_state *render_state = &i965->render_state;
516     struct i965_cc_viewport *cc_viewport;
517
518     dri_bo_map(render_state->cc.viewport, 1);
519     assert(render_state->cc.viewport->virtual);
520     cc_viewport = render_state->cc.viewport->virtual;
521     memset(cc_viewport, 0, sizeof(*cc_viewport));
522     
523     cc_viewport->min_depth = -1.e35;
524     cc_viewport->max_depth = 1.e35;
525
526     dri_bo_unmap(render_state->cc.viewport);
527 }
528
529 static void 
530 i965_subpic_render_cc_unit(VADriverContextP ctx)
531 {
532     struct i965_driver_data *i965 = i965_driver_data(ctx);
533     struct i965_render_state *render_state = &i965->render_state;
534     struct i965_cc_unit_state *cc_state;
535
536     assert(render_state->cc.viewport);
537
538     dri_bo_map(render_state->cc.state, 1);
539     assert(render_state->cc.state->virtual);
540     cc_state = render_state->cc.state->virtual;
541     memset(cc_state, 0, sizeof(*cc_state));
542
543     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
544     cc_state->cc2.depth_test = 0;       /* disable depth test */
545     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
546     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
547     cc_state->cc3.blend_enable = 1;     /* enable color blend */
548     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
549     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
550     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
551     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
552
553     cc_state->cc5.dither_enable = 0;    /* disable dither */
554     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
555     cc_state->cc5.statistics_enable = 1;
556     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
557     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
558     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
559
560     cc_state->cc6.clamp_post_alpha_blend = 0; 
561     cc_state->cc6.clamp_pre_alpha_blend  =0; 
562     
563     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
564     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
565     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
566     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
567    
568     /*alpha test reference*/
569     cc_state->cc7.alpha_ref.f =0.0 ;
570
571
572     dri_bo_emit_reloc(render_state->cc.state,
573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
574                       0,
575                       offsetof(struct i965_cc_unit_state, cc4),
576                       render_state->cc.viewport);
577
578     dri_bo_unmap(render_state->cc.state);
579 }
580
581
582 static void 
583 i965_render_cc_unit(VADriverContextP ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     struct i965_render_state *render_state = &i965->render_state;
587     struct i965_cc_unit_state *cc_state;
588
589     assert(render_state->cc.viewport);
590
591     dri_bo_map(render_state->cc.state, 1);
592     assert(render_state->cc.state->virtual);
593     cc_state = render_state->cc.state->virtual;
594     memset(cc_state, 0, sizeof(*cc_state));
595
596     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
597     cc_state->cc2.depth_test = 0;       /* disable depth test */
598     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
599     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
600     cc_state->cc3.blend_enable = 0;     /* disable color blend */
601     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
602     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
603
604     cc_state->cc5.dither_enable = 0;    /* disable dither */
605     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
606     cc_state->cc5.statistics_enable = 1;
607     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
608     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
609     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
610
611     dri_bo_emit_reloc(render_state->cc.state,
612                       I915_GEM_DOMAIN_INSTRUCTION, 0,
613                       0,
614                       offsetof(struct i965_cc_unit_state, cc4),
615                       render_state->cc.viewport);
616
617     dri_bo_unmap(render_state->cc.state);
618 }
619
620 static void
621 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
622 {
623     switch (tiling) {
624     case I915_TILING_NONE:
625         ss->ss3.tiled_surface = 0;
626         ss->ss3.tile_walk = 0;
627         break;
628     case I915_TILING_X:
629         ss->ss3.tiled_surface = 1;
630         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
631         break;
632     case I915_TILING_Y:
633         ss->ss3.tiled_surface = 1;
634         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
635         break;
636     }
637 }
638
639 static void
640 i965_render_set_surface_state(struct i965_surface_state *ss,
641                               dri_bo *bo, unsigned long offset,
642                               int width, int height,
643                               int pitch, int format)
644 {
645     unsigned int tiling;
646     unsigned int swizzle;
647
648     memset(ss, 0, sizeof(*ss));
649     ss->ss0.surface_type = I965_SURFACE_2D;
650     ss->ss0.surface_format = format;
651     ss->ss0.color_blend = 1;
652
653     ss->ss1.base_addr = bo->offset + offset;
654
655     ss->ss2.width = width - 1;
656     ss->ss2.height = height - 1;
657
658     ss->ss3.pitch = pitch - 1;
659
660     dri_bo_get_tiling(bo, &tiling, &swizzle);
661     i965_render_set_surface_tiling(ss, tiling);
662 }
663
664 static void
665 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
666 {
667    switch (tiling) {
668    case I915_TILING_NONE:
669       ss->ss0.tiled_surface = 0;
670       ss->ss0.tile_walk = 0;
671       break;
672    case I915_TILING_X:
673       ss->ss0.tiled_surface = 1;
674       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
675       break;
676    case I915_TILING_Y:
677       ss->ss0.tiled_surface = 1;
678       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
679       break;
680    }
681 }
682
683 static void
684 gen7_render_set_surface_state(struct gen7_surface_state *ss,
685                               dri_bo *bo, unsigned long offset,
686                               int width, int height,
687                               int pitch, int format)
688 {
689     unsigned int tiling;
690     unsigned int swizzle;
691
692     memset(ss, 0, sizeof(*ss));
693
694     ss->ss0.surface_type = I965_SURFACE_2D;
695     ss->ss0.surface_format = format;
696
697     ss->ss1.base_addr = bo->offset + offset;
698
699     ss->ss2.width = width - 1;
700     ss->ss2.height = height - 1;
701
702     ss->ss3.pitch = pitch - 1;
703
704     dri_bo_get_tiling(bo, &tiling, &swizzle);
705     gen7_render_set_surface_tiling(ss, tiling);
706 }
707
708 static void
709 i965_render_src_surface_state(VADriverContextP ctx, 
710                               int index,
711                               dri_bo *region,
712                               unsigned long offset,
713                               int w, int h,
714                               int pitch, int format)
715 {
716     struct i965_driver_data *i965 = i965_driver_data(ctx);  
717     struct i965_render_state *render_state = &i965->render_state;
718     void *ss;
719     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
720
721     assert(index < MAX_RENDER_SURFACES);
722
723     dri_bo_map(ss_bo, 1);
724     assert(ss_bo->virtual);
725     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
726
727     if (IS_GEN7(i965->intel.device_id)) {
728         gen7_render_set_surface_state(ss,
729                                       region, offset,
730                                       w, h,
731                                       pitch, format);
732         dri_bo_emit_reloc(ss_bo,
733                           I915_GEM_DOMAIN_SAMPLER, 0,
734                           offset,
735                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
736                           region);
737     } else {
738         i965_render_set_surface_state(ss,
739                                       region, offset,
740                                       w, h,
741                                       pitch, format);
742         dri_bo_emit_reloc(ss_bo,
743                           I915_GEM_DOMAIN_SAMPLER, 0,
744                           offset,
745                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
746                           region);
747     }
748
749     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
750     dri_bo_unmap(ss_bo);
751     render_state->wm.sampler_count++;
752 }
753
754 static void
755 i965_render_src_surfaces_state(VADriverContextP ctx,
756                               VASurfaceID surface)
757 {
758     struct i965_driver_data *i965 = i965_driver_data(ctx);  
759     struct object_surface *obj_surface;
760     int region_pitch;
761     int rw, rh;
762     dri_bo *region;
763
764     obj_surface = SURFACE(surface);
765     assert(obj_surface);
766
767     region_pitch = obj_surface->width;
768     rw = obj_surface->orig_width;
769     rh = obj_surface->orig_height;
770     region = obj_surface->bo;
771
772     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
773     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM);
774
775     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
776         i965_render_src_surface_state(ctx, 3, region,
777                                       region_pitch * obj_surface->y_cb_offset,
778                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
779                                       I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
780         i965_render_src_surface_state(ctx, 4, region,
781                                       region_pitch * obj_surface->y_cb_offset,
782                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
783                                       I965_SURFACEFORMAT_R8G8_UNORM);
784     } else {
785         i965_render_src_surface_state(ctx, 3, region,
786                                       region_pitch * obj_surface->y_cb_offset,
787                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
788                                       I965_SURFACEFORMAT_R8_UNORM); /* U */
789         i965_render_src_surface_state(ctx, 4, region,
790                                       region_pitch * obj_surface->y_cb_offset,
791                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
792                                       I965_SURFACEFORMAT_R8_UNORM);
793         i965_render_src_surface_state(ctx, 5, region,
794                                       region_pitch * obj_surface->y_cr_offset,
795                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
796                                       I965_SURFACEFORMAT_R8_UNORM); /* V */
797         i965_render_src_surface_state(ctx, 6, region,
798                                       region_pitch * obj_surface->y_cr_offset,
799                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
800                                       I965_SURFACEFORMAT_R8_UNORM);
801     }
802 }
803
804 static void
805 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
806                               VASurfaceID surface)
807 {
808     struct i965_driver_data *i965 = i965_driver_data(ctx);  
809     struct object_surface *obj_surface = SURFACE(surface);
810     dri_bo *subpic_region;
811     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
812     struct object_image *obj_image = IMAGE(obj_subpic->image);
813     assert(obj_surface);
814     assert(obj_surface->bo);
815     subpic_region = obj_image->bo;
816     /*subpicture surface*/
817     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
818     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
819 }
820
821 static void
822 i965_render_dest_surface_state(VADriverContextP ctx, int index)
823 {
824     struct i965_driver_data *i965 = i965_driver_data(ctx);  
825     struct i965_render_state *render_state = &i965->render_state;
826     struct intel_region *dest_region = render_state->draw_region;
827     void *ss;
828     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
829     int format;
830     assert(index < MAX_RENDER_SURFACES);
831
832     if (dest_region->cpp == 2) {
833         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
834     } else {
835         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
836     }
837
838     dri_bo_map(ss_bo, 1);
839     assert(ss_bo->virtual);
840     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
841
842     if (IS_GEN7(i965->intel.device_id)) {
843         gen7_render_set_surface_state(ss,
844                                       dest_region->bo, 0,
845                                       dest_region->width, dest_region->height,
846                                       dest_region->pitch, format);
847         dri_bo_emit_reloc(ss_bo,
848                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
849                           0,
850                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
851                           dest_region->bo);
852     } else {
853         i965_render_set_surface_state(ss,
854                                       dest_region->bo, 0,
855                                       dest_region->width, dest_region->height,
856                                       dest_region->pitch, format);
857         dri_bo_emit_reloc(ss_bo,
858                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
859                           0,
860                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
861                           dest_region->bo);
862     }
863
864     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
865     dri_bo_unmap(ss_bo);
866 }
867
868 static void 
869 i965_subpic_render_upload_vertex(VADriverContextP ctx,
870                                  VASurfaceID surface,
871                                  const VARectangle *output_rect)
872 {    
873     struct i965_driver_data  *i965         = i965_driver_data(ctx);
874     struct i965_render_state *render_state = &i965->render_state;
875     struct object_surface    *obj_surface  = SURFACE(surface);
876     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
877     VARectangle dst_rect;
878     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
879     int i = 0;
880
881     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
882         dst_rect = obj_subpic->dst_rect;
883     else {
884         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
885         const float sy  = (float)output_rect->height / obj_surface->orig_height;
886         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
887         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
888         dst_rect.width  = sx * obj_subpic->dst_rect.width;
889         dst_rect.height = sy * obj_subpic->dst_rect.height;
890     }
891
892     dri_bo_map(render_state->vb.vertex_buffer, 1);
893     assert(render_state->vb.vertex_buffer->virtual);
894     vb = render_state->vb.vertex_buffer->virtual;
895
896     tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width;
897     ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height;
898     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
899     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
900
901     x1 = (float)dst_rect.x;
902     y1 = (float)dst_rect.y;
903     x2 = (float)(dst_rect.x + dst_rect.width);
904     y2 = (float)(dst_rect.y + dst_rect.height);
905
906     vb[i++] = tx2;
907     vb[i++] = ty2;
908     vb[i++] = x2;
909     vb[i++] = y2;
910
911     vb[i++] = tx1;
912     vb[i++] = ty2;
913     vb[i++] = x1;
914     vb[i++] = y2;
915
916     vb[i++] = tx1;
917     vb[i++] = ty1;
918     vb[i++] = x1;
919     vb[i++] = y1;
920     dri_bo_unmap(render_state->vb.vertex_buffer);
921 }
922
923 static void 
924 i965_render_upload_vertex(
925     VADriverContextP   ctx,
926     VASurfaceID        surface,
927     const VARectangle *src_rect,
928     const VARectangle *dst_rect
929 )
930 {
931     struct i965_driver_data *i965 = i965_driver_data(ctx);
932     struct i965_render_state *render_state = &i965->render_state;
933     struct intel_region *dest_region = render_state->draw_region;
934     struct object_surface *obj_surface;
935     float *vb;
936
937     float u1, v1, u2, v2;
938     int i, width, height;
939     int box_x1 = dest_region->x + dst_rect->x;
940     int box_y1 = dest_region->y + dst_rect->y;
941     int box_x2 = box_x1 + dst_rect->width;
942     int box_y2 = box_y1 + dst_rect->height;
943
944     obj_surface = SURFACE(surface);
945     assert(surface);
946     width = obj_surface->orig_width;
947     height = obj_surface->orig_height;
948
949     u1 = (float)src_rect->x / width;
950     v1 = (float)src_rect->y / height;
951     u2 = (float)(src_rect->x + src_rect->width) / width;
952     v2 = (float)(src_rect->y + src_rect->height) / height;
953
954     dri_bo_map(render_state->vb.vertex_buffer, 1);
955     assert(render_state->vb.vertex_buffer->virtual);
956     vb = render_state->vb.vertex_buffer->virtual;
957
958     i = 0;
959     vb[i++] = u2;
960     vb[i++] = v2;
961     vb[i++] = (float)box_x2;
962     vb[i++] = (float)box_y2;
963     
964     vb[i++] = u1;
965     vb[i++] = v2;
966     vb[i++] = (float)box_x1;
967     vb[i++] = (float)box_y2;
968
969     vb[i++] = u1;
970     vb[i++] = v1;
971     vb[i++] = (float)box_x1;
972     vb[i++] = (float)box_y1;
973
974     dri_bo_unmap(render_state->vb.vertex_buffer);
975 }
976
977 static void
978 i965_render_upload_constants(VADriverContextP ctx,
979                              VASurfaceID surface)
980 {
981     struct i965_driver_data *i965 = i965_driver_data(ctx);
982     struct i965_render_state *render_state = &i965->render_state;
983     unsigned short *constant_buffer;
984     struct object_surface *obj_surface = SURFACE(surface);
985
986     dri_bo_map(render_state->curbe.bo, 1);
987     assert(render_state->curbe.bo->virtual);
988     constant_buffer = render_state->curbe.bo->virtual;
989
990     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
991         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
992                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
993         *constant_buffer = 2;
994     } else {
995         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
996             *constant_buffer = 1;
997         else
998             *constant_buffer = 0;
999     }
1000
1001     dri_bo_unmap(render_state->curbe.bo);
1002 }
1003
1004 static void
1005 i965_surface_render_state_setup(
1006     VADriverContextP   ctx,
1007     VASurfaceID        surface,
1008     const VARectangle *src_rect,
1009     const VARectangle *dst_rect
1010 )
1011 {
1012     i965_render_vs_unit(ctx);
1013     i965_render_sf_unit(ctx);
1014     i965_render_dest_surface_state(ctx, 0);
1015     i965_render_src_surfaces_state(ctx, surface);
1016     i965_render_sampler(ctx);
1017     i965_render_wm_unit(ctx);
1018     i965_render_cc_viewport(ctx);
1019     i965_render_cc_unit(ctx);
1020     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1021     i965_render_upload_constants(ctx, surface);
1022 }
1023 static void
1024 i965_subpic_render_state_setup(
1025     VADriverContextP   ctx,
1026     VASurfaceID        surface,
1027     const VARectangle *src_rect,
1028     const VARectangle *dst_rect
1029 )
1030 {
1031     i965_render_vs_unit(ctx);
1032     i965_render_sf_unit(ctx);
1033     i965_render_dest_surface_state(ctx, 0);
1034     i965_subpic_render_src_surfaces_state(ctx, surface);
1035     i965_render_sampler(ctx);
1036     i965_subpic_render_wm_unit(ctx);
1037     i965_render_cc_viewport(ctx);
1038     i965_subpic_render_cc_unit(ctx);
1039     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1040 }
1041
1042
1043 static void
1044 i965_render_pipeline_select(VADriverContextP ctx)
1045 {
1046     struct i965_driver_data *i965 = i965_driver_data(ctx);
1047     struct intel_batchbuffer *batch = i965->batch;
1048  
1049     BEGIN_BATCH(batch, 1);
1050     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1051     ADVANCE_BATCH(batch);
1052 }
1053
1054 static void
1055 i965_render_state_sip(VADriverContextP ctx)
1056 {
1057     struct i965_driver_data *i965 = i965_driver_data(ctx);
1058     struct intel_batchbuffer *batch = i965->batch;
1059
1060     BEGIN_BATCH(batch, 2);
1061     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1062     OUT_BATCH(batch, 0);
1063     ADVANCE_BATCH(batch);
1064 }
1065
1066 static void
1067 i965_render_state_base_address(VADriverContextP ctx)
1068 {
1069     struct i965_driver_data *i965 = i965_driver_data(ctx);
1070     struct intel_batchbuffer *batch = i965->batch;
1071     struct i965_render_state *render_state = &i965->render_state;
1072
1073     if (IS_IRONLAKE(i965->intel.device_id)) {
1074         BEGIN_BATCH(batch, 8);
1075         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1076         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1077         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1078         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1079         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1080         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1081         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1082         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1083         ADVANCE_BATCH(batch);
1084     } else {
1085         BEGIN_BATCH(batch, 6);
1086         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1087         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1088         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1089         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1090         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1091         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1092         ADVANCE_BATCH(batch);
1093     }
1094 }
1095
1096 static void
1097 i965_render_binding_table_pointers(VADriverContextP ctx)
1098 {
1099     struct i965_driver_data *i965 = i965_driver_data(ctx);
1100     struct intel_batchbuffer *batch = i965->batch;
1101
1102     BEGIN_BATCH(batch, 6);
1103     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1104     OUT_BATCH(batch, 0); /* vs */
1105     OUT_BATCH(batch, 0); /* gs */
1106     OUT_BATCH(batch, 0); /* clip */
1107     OUT_BATCH(batch, 0); /* sf */
1108     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1109     ADVANCE_BATCH(batch);
1110 }
1111
1112 static void 
1113 i965_render_constant_color(VADriverContextP ctx)
1114 {
1115     struct i965_driver_data *i965 = i965_driver_data(ctx);
1116     struct intel_batchbuffer *batch = i965->batch;
1117
1118     BEGIN_BATCH(batch, 5);
1119     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1120     OUT_BATCH(batch, float_to_uint(1.0));
1121     OUT_BATCH(batch, float_to_uint(0.0));
1122     OUT_BATCH(batch, float_to_uint(1.0));
1123     OUT_BATCH(batch, float_to_uint(1.0));
1124     ADVANCE_BATCH(batch);
1125 }
1126
1127 static void
1128 i965_render_pipelined_pointers(VADriverContextP ctx)
1129 {
1130     struct i965_driver_data *i965 = i965_driver_data(ctx);
1131     struct intel_batchbuffer *batch = i965->batch;
1132     struct i965_render_state *render_state = &i965->render_state;
1133
1134     BEGIN_BATCH(batch, 7);
1135     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1136     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1137     OUT_BATCH(batch, 0);  /* disable GS */
1138     OUT_BATCH(batch, 0);  /* disable CLIP */
1139     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1140     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1141     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1142     ADVANCE_BATCH(batch);
1143 }
1144
1145 static void
1146 i965_render_urb_layout(VADriverContextP ctx)
1147 {
1148     struct i965_driver_data *i965 = i965_driver_data(ctx);
1149     struct intel_batchbuffer *batch = i965->batch;
1150     int urb_vs_start, urb_vs_size;
1151     int urb_gs_start, urb_gs_size;
1152     int urb_clip_start, urb_clip_size;
1153     int urb_sf_start, urb_sf_size;
1154     int urb_cs_start, urb_cs_size;
1155
1156     urb_vs_start = 0;
1157     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1158     urb_gs_start = urb_vs_start + urb_vs_size;
1159     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1160     urb_clip_start = urb_gs_start + urb_gs_size;
1161     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1162     urb_sf_start = urb_clip_start + urb_clip_size;
1163     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1164     urb_cs_start = urb_sf_start + urb_sf_size;
1165     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1166
1167     BEGIN_BATCH(batch, 3);
1168     OUT_BATCH(batch, 
1169               CMD_URB_FENCE |
1170               UF0_CS_REALLOC |
1171               UF0_SF_REALLOC |
1172               UF0_CLIP_REALLOC |
1173               UF0_GS_REALLOC |
1174               UF0_VS_REALLOC |
1175               1);
1176     OUT_BATCH(batch, 
1177               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1178               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1179               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1180     OUT_BATCH(batch,
1181               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1182               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1183     ADVANCE_BATCH(batch);
1184 }
1185
1186 static void 
1187 i965_render_cs_urb_layout(VADriverContextP ctx)
1188 {
1189     struct i965_driver_data *i965 = i965_driver_data(ctx);
1190     struct intel_batchbuffer *batch = i965->batch;
1191
1192     BEGIN_BATCH(batch, 2);
1193     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1194     OUT_BATCH(batch,
1195               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1196               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1197     ADVANCE_BATCH(batch);
1198 }
1199
1200 static void
1201 i965_render_constant_buffer(VADriverContextP ctx)
1202 {
1203     struct i965_driver_data *i965 = i965_driver_data(ctx);
1204     struct intel_batchbuffer *batch = i965->batch;
1205     struct i965_render_state *render_state = &i965->render_state;
1206
1207     BEGIN_BATCH(batch, 2);
1208     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1209     OUT_RELOC(batch, render_state->curbe.bo,
1210               I915_GEM_DOMAIN_INSTRUCTION, 0,
1211               URB_CS_ENTRY_SIZE - 1);
1212     ADVANCE_BATCH(batch);    
1213 }
1214
1215 static void
1216 i965_render_drawing_rectangle(VADriverContextP ctx)
1217 {
1218     struct i965_driver_data *i965 = i965_driver_data(ctx);
1219     struct intel_batchbuffer *batch = i965->batch;
1220     struct i965_render_state *render_state = &i965->render_state;
1221     struct intel_region *dest_region = render_state->draw_region;
1222
1223     BEGIN_BATCH(batch, 4);
1224     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1225     OUT_BATCH(batch, 0x00000000);
1226     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1227     OUT_BATCH(batch, 0x00000000);         
1228     ADVANCE_BATCH(batch);
1229 }
1230
1231 static void
1232 i965_render_vertex_elements(VADriverContextP ctx)
1233 {
1234     struct i965_driver_data *i965 = i965_driver_data(ctx);
1235     struct intel_batchbuffer *batch = i965->batch;
1236
1237     if (IS_IRONLAKE(i965->intel.device_id)) {
1238         BEGIN_BATCH(batch, 5);
1239         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1240         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1241         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1242                   VE0_VALID |
1243                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1244                   (0 << VE0_OFFSET_SHIFT));
1245         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1246                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1247                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1248                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1249         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1250         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1251                   VE0_VALID |
1252                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1253                   (8 << VE0_OFFSET_SHIFT));
1254         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1255                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1256                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1257                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1258         ADVANCE_BATCH(batch);
1259     } else {
1260         BEGIN_BATCH(batch, 5);
1261         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1262         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1263         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1264                   VE0_VALID |
1265                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1266                   (0 << VE0_OFFSET_SHIFT));
1267         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1268                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1269                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1270                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1271                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1272         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1273         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1274                   VE0_VALID |
1275                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1276                   (8 << VE0_OFFSET_SHIFT));
1277         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1278                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1279                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1280                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1281                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1282         ADVANCE_BATCH(batch);
1283     }
1284 }
1285
1286 static void
1287 i965_render_upload_image_palette(
1288     VADriverContextP ctx,
1289     VAImageID        image_id,
1290     unsigned int     alpha
1291 )
1292 {
1293     struct i965_driver_data *i965 = i965_driver_data(ctx);
1294     struct intel_batchbuffer *batch = i965->batch;
1295     unsigned int i;
1296
1297     struct object_image *obj_image = IMAGE(image_id);
1298     assert(obj_image);
1299
1300     if (obj_image->image.num_palette_entries == 0)
1301         return;
1302
1303     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1304     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1305     /*fill palette*/
1306     //int32_t out[16]; //0-23:color 23-31:alpha
1307     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1308         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1309     ADVANCE_BATCH(batch);
1310 }
1311
1312 static void
1313 i965_render_startup(VADriverContextP ctx)
1314 {
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct intel_batchbuffer *batch = i965->batch;
1317     struct i965_render_state *render_state = &i965->render_state;
1318
1319     BEGIN_BATCH(batch, 11);
1320     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1321     OUT_BATCH(batch, 
1322               (0 << VB0_BUFFER_INDEX_SHIFT) |
1323               VB0_VERTEXDATA |
1324               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1325     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1326
1327     if (IS_IRONLAKE(i965->intel.device_id))
1328         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1329     else
1330         OUT_BATCH(batch, 3);
1331
1332     OUT_BATCH(batch, 0);
1333
1334     OUT_BATCH(batch, 
1335               CMD_3DPRIMITIVE |
1336               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1337               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1338               (0 << 9) |
1339               4);
1340     OUT_BATCH(batch, 3); /* vertex count per instance */
1341     OUT_BATCH(batch, 0); /* start vertex offset */
1342     OUT_BATCH(batch, 1); /* single instance */
1343     OUT_BATCH(batch, 0); /* start instance location */
1344     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1345     ADVANCE_BATCH(batch);
1346 }
1347
1348 static void 
1349 i965_clear_dest_region(VADriverContextP ctx)
1350 {
1351     struct i965_driver_data *i965 = i965_driver_data(ctx);
1352     struct intel_batchbuffer *batch = i965->batch;
1353     struct i965_render_state *render_state = &i965->render_state;
1354     struct intel_region *dest_region = render_state->draw_region;
1355     unsigned int blt_cmd, br13;
1356     int pitch;
1357
1358     blt_cmd = XY_COLOR_BLT_CMD;
1359     br13 = 0xf0 << 16;
1360     pitch = dest_region->pitch;
1361
1362     if (dest_region->cpp == 4) {
1363         br13 |= BR13_8888;
1364         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1365     } else {
1366         assert(dest_region->cpp == 2);
1367         br13 |= BR13_565;
1368     }
1369
1370     if (dest_region->tiling != I915_TILING_NONE) {
1371         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1372         pitch /= 4;
1373     }
1374
1375     br13 |= pitch;
1376
1377     if (IS_GEN6(i965->intel.device_id) ||
1378         IS_GEN7(i965->intel.device_id)) {
1379         intel_batchbuffer_start_atomic_blt(batch, 24);
1380         BEGIN_BLT_BATCH(batch, 6);
1381     } else {
1382         intel_batchbuffer_start_atomic(batch, 24);
1383         BEGIN_BATCH(batch, 6);
1384     }
1385
1386     OUT_BATCH(batch, blt_cmd);
1387     OUT_BATCH(batch, br13);
1388     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1389     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1390               (dest_region->x + dest_region->width));
1391     OUT_RELOC(batch, dest_region->bo, 
1392               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1393               0);
1394     OUT_BATCH(batch, 0x0);
1395     ADVANCE_BATCH(batch);
1396     intel_batchbuffer_end_atomic(batch);
1397 }
1398
1399 static void
1400 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1401 {
1402     struct i965_driver_data *i965 = i965_driver_data(ctx);
1403     struct intel_batchbuffer *batch = i965->batch;
1404
1405     i965_clear_dest_region(ctx);
1406     intel_batchbuffer_start_atomic(batch, 0x1000);
1407     intel_batchbuffer_emit_mi_flush(batch);
1408     i965_render_pipeline_select(ctx);
1409     i965_render_state_sip(ctx);
1410     i965_render_state_base_address(ctx);
1411     i965_render_binding_table_pointers(ctx);
1412     i965_render_constant_color(ctx);
1413     i965_render_pipelined_pointers(ctx);
1414     i965_render_urb_layout(ctx);
1415     i965_render_cs_urb_layout(ctx);
1416     i965_render_constant_buffer(ctx);
1417     i965_render_drawing_rectangle(ctx);
1418     i965_render_vertex_elements(ctx);
1419     i965_render_startup(ctx);
1420     intel_batchbuffer_end_atomic(batch);
1421 }
1422
1423 static void
1424 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1425 {
1426     struct i965_driver_data *i965 = i965_driver_data(ctx);
1427     struct intel_batchbuffer *batch = i965->batch;
1428
1429     intel_batchbuffer_start_atomic(batch, 0x1000);
1430     intel_batchbuffer_emit_mi_flush(batch);
1431     i965_render_pipeline_select(ctx);
1432     i965_render_state_sip(ctx);
1433     i965_render_state_base_address(ctx);
1434     i965_render_binding_table_pointers(ctx);
1435     i965_render_constant_color(ctx);
1436     i965_render_pipelined_pointers(ctx);
1437     i965_render_urb_layout(ctx);
1438     i965_render_cs_urb_layout(ctx);
1439     i965_render_drawing_rectangle(ctx);
1440     i965_render_vertex_elements(ctx);
1441     i965_render_startup(ctx);
1442     intel_batchbuffer_end_atomic(batch);
1443 }
1444
1445
1446 static void 
1447 i965_render_initialize(VADriverContextP ctx)
1448 {
1449     struct i965_driver_data *i965 = i965_driver_data(ctx);
1450     struct i965_render_state *render_state = &i965->render_state;
1451     dri_bo *bo;
1452
1453     /* VERTEX BUFFER */
1454     dri_bo_unreference(render_state->vb.vertex_buffer);
1455     bo = dri_bo_alloc(i965->intel.bufmgr,
1456                       "vertex buffer",
1457                       4096,
1458                       4096);
1459     assert(bo);
1460     render_state->vb.vertex_buffer = bo;
1461
1462     /* VS */
1463     dri_bo_unreference(render_state->vs.state);
1464     bo = dri_bo_alloc(i965->intel.bufmgr,
1465                       "vs state",
1466                       sizeof(struct i965_vs_unit_state),
1467                       64);
1468     assert(bo);
1469     render_state->vs.state = bo;
1470
1471     /* GS */
1472     /* CLIP */
1473     /* SF */
1474     dri_bo_unreference(render_state->sf.state);
1475     bo = dri_bo_alloc(i965->intel.bufmgr,
1476                       "sf state",
1477                       sizeof(struct i965_sf_unit_state),
1478                       64);
1479     assert(bo);
1480     render_state->sf.state = bo;
1481
1482     /* WM */
1483     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1484     bo = dri_bo_alloc(i965->intel.bufmgr,
1485                       "surface state & binding table",
1486                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1487                       4096);
1488     assert(bo);
1489     render_state->wm.surface_state_binding_table_bo = bo;
1490
1491     dri_bo_unreference(render_state->wm.sampler);
1492     bo = dri_bo_alloc(i965->intel.bufmgr,
1493                       "sampler state",
1494                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1495                       64);
1496     assert(bo);
1497     render_state->wm.sampler = bo;
1498     render_state->wm.sampler_count = 0;
1499
1500     dri_bo_unreference(render_state->wm.state);
1501     bo = dri_bo_alloc(i965->intel.bufmgr,
1502                       "wm state",
1503                       sizeof(struct i965_wm_unit_state),
1504                       64);
1505     assert(bo);
1506     render_state->wm.state = bo;
1507
1508     /* COLOR CALCULATOR */
1509     dri_bo_unreference(render_state->cc.state);
1510     bo = dri_bo_alloc(i965->intel.bufmgr,
1511                       "color calc state",
1512                       sizeof(struct i965_cc_unit_state),
1513                       64);
1514     assert(bo);
1515     render_state->cc.state = bo;
1516
1517     dri_bo_unreference(render_state->cc.viewport);
1518     bo = dri_bo_alloc(i965->intel.bufmgr,
1519                       "cc viewport",
1520                       sizeof(struct i965_cc_viewport),
1521                       64);
1522     assert(bo);
1523     render_state->cc.viewport = bo;
1524 }
1525
1526 static void
1527 i965_render_put_surface(
1528     VADriverContextP   ctx,
1529     VASurfaceID        surface,
1530     const VARectangle *src_rect,
1531     const VARectangle *dst_rect,
1532     unsigned int       flags
1533 )
1534 {
1535     struct i965_driver_data *i965 = i965_driver_data(ctx);
1536     struct intel_batchbuffer *batch = i965->batch;
1537
1538     i965_render_initialize(ctx);
1539     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect);
1540     i965_surface_render_pipeline_setup(ctx);
1541     intel_batchbuffer_flush(batch);
1542 }
1543
1544 static void
1545 i965_render_put_subpicture(
1546     VADriverContextP   ctx,
1547     VASurfaceID        surface,
1548     const VARectangle *src_rect,
1549     const VARectangle *dst_rect
1550 )
1551 {
1552     struct i965_driver_data *i965 = i965_driver_data(ctx);
1553     struct intel_batchbuffer *batch = i965->batch;
1554     struct object_surface *obj_surface = SURFACE(surface);
1555     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1556
1557     assert(obj_subpic);
1558
1559     i965_render_initialize(ctx);
1560     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1561     i965_subpic_render_pipeline_setup(ctx);
1562     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1563     intel_batchbuffer_flush(batch);
1564 }
1565
1566 /*
1567  * for GEN6+
1568  */
1569 static void 
1570 gen6_render_initialize(VADriverContextP ctx)
1571 {
1572     struct i965_driver_data *i965 = i965_driver_data(ctx);
1573     struct i965_render_state *render_state = &i965->render_state;
1574     dri_bo *bo;
1575
1576     /* VERTEX BUFFER */
1577     dri_bo_unreference(render_state->vb.vertex_buffer);
1578     bo = dri_bo_alloc(i965->intel.bufmgr,
1579                       "vertex buffer",
1580                       4096,
1581                       4096);
1582     assert(bo);
1583     render_state->vb.vertex_buffer = bo;
1584
1585     /* WM */
1586     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1587     bo = dri_bo_alloc(i965->intel.bufmgr,
1588                       "surface state & binding table",
1589                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1590                       4096);
1591     assert(bo);
1592     render_state->wm.surface_state_binding_table_bo = bo;
1593
1594     dri_bo_unreference(render_state->wm.sampler);
1595     bo = dri_bo_alloc(i965->intel.bufmgr,
1596                       "sampler state",
1597                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1598                       4096);
1599     assert(bo);
1600     render_state->wm.sampler = bo;
1601     render_state->wm.sampler_count = 0;
1602
1603     /* COLOR CALCULATOR */
1604     dri_bo_unreference(render_state->cc.state);
1605     bo = dri_bo_alloc(i965->intel.bufmgr,
1606                       "color calc state",
1607                       sizeof(struct gen6_color_calc_state),
1608                       4096);
1609     assert(bo);
1610     render_state->cc.state = bo;
1611
1612     /* CC VIEWPORT */
1613     dri_bo_unreference(render_state->cc.viewport);
1614     bo = dri_bo_alloc(i965->intel.bufmgr,
1615                       "cc viewport",
1616                       sizeof(struct i965_cc_viewport),
1617                       4096);
1618     assert(bo);
1619     render_state->cc.viewport = bo;
1620
1621     /* BLEND STATE */
1622     dri_bo_unreference(render_state->cc.blend);
1623     bo = dri_bo_alloc(i965->intel.bufmgr,
1624                       "blend state",
1625                       sizeof(struct gen6_blend_state),
1626                       4096);
1627     assert(bo);
1628     render_state->cc.blend = bo;
1629
1630     /* DEPTH & STENCIL STATE */
1631     dri_bo_unreference(render_state->cc.depth_stencil);
1632     bo = dri_bo_alloc(i965->intel.bufmgr,
1633                       "depth & stencil state",
1634                       sizeof(struct gen6_depth_stencil_state),
1635                       4096);
1636     assert(bo);
1637     render_state->cc.depth_stencil = bo;
1638 }
1639
1640 static void
1641 gen6_render_color_calc_state(VADriverContextP ctx)
1642 {
1643     struct i965_driver_data *i965 = i965_driver_data(ctx);
1644     struct i965_render_state *render_state = &i965->render_state;
1645     struct gen6_color_calc_state *color_calc_state;
1646     
1647     dri_bo_map(render_state->cc.state, 1);
1648     assert(render_state->cc.state->virtual);
1649     color_calc_state = render_state->cc.state->virtual;
1650     memset(color_calc_state, 0, sizeof(*color_calc_state));
1651     color_calc_state->constant_r = 1.0;
1652     color_calc_state->constant_g = 0.0;
1653     color_calc_state->constant_b = 1.0;
1654     color_calc_state->constant_a = 1.0;
1655     dri_bo_unmap(render_state->cc.state);
1656 }
1657
1658 static void
1659 gen6_render_blend_state(VADriverContextP ctx)
1660 {
1661     struct i965_driver_data *i965 = i965_driver_data(ctx);
1662     struct i965_render_state *render_state = &i965->render_state;
1663     struct gen6_blend_state *blend_state;
1664     
1665     dri_bo_map(render_state->cc.blend, 1);
1666     assert(render_state->cc.blend->virtual);
1667     blend_state = render_state->cc.blend->virtual;
1668     memset(blend_state, 0, sizeof(*blend_state));
1669     blend_state->blend1.logic_op_enable = 1;
1670     blend_state->blend1.logic_op_func = 0xc;
1671     dri_bo_unmap(render_state->cc.blend);
1672 }
1673
1674 static void
1675 gen6_render_depth_stencil_state(VADriverContextP ctx)
1676 {
1677     struct i965_driver_data *i965 = i965_driver_data(ctx);
1678     struct i965_render_state *render_state = &i965->render_state;
1679     struct gen6_depth_stencil_state *depth_stencil_state;
1680     
1681     dri_bo_map(render_state->cc.depth_stencil, 1);
1682     assert(render_state->cc.depth_stencil->virtual);
1683     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1684     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1685     dri_bo_unmap(render_state->cc.depth_stencil);
1686 }
1687
1688 static void
1689 gen6_render_setup_states(
1690     VADriverContextP   ctx,
1691     VASurfaceID        surface,
1692     const VARectangle *src_rect,
1693     const VARectangle *dst_rect
1694 )
1695 {
1696     i965_render_dest_surface_state(ctx, 0);
1697     i965_render_src_surfaces_state(ctx, surface);
1698     i965_render_sampler(ctx);
1699     i965_render_cc_viewport(ctx);
1700     gen6_render_color_calc_state(ctx);
1701     gen6_render_blend_state(ctx);
1702     gen6_render_depth_stencil_state(ctx);
1703     i965_render_upload_constants(ctx, surface);
1704     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1705 }
1706
1707 static void
1708 gen6_emit_invarient_states(VADriverContextP ctx)
1709 {
1710     struct i965_driver_data *i965 = i965_driver_data(ctx);
1711     struct intel_batchbuffer *batch = i965->batch;
1712
1713     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1714
1715     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1716     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1717               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1718     OUT_BATCH(batch, 0);
1719
1720     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1721     OUT_BATCH(batch, 1);
1722
1723     /* Set system instruction pointer */
1724     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1725     OUT_BATCH(batch, 0);
1726 }
1727
1728 static void
1729 gen6_emit_state_base_address(VADriverContextP ctx)
1730 {
1731     struct i965_driver_data *i965 = i965_driver_data(ctx);
1732     struct intel_batchbuffer *batch = i965->batch;
1733     struct i965_render_state *render_state = &i965->render_state;
1734
1735     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1736     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1737     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1738     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1739     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1740     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1741     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1742     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1743     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1744     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1745 }
1746
1747 static void
1748 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1749 {
1750     struct i965_driver_data *i965 = i965_driver_data(ctx);
1751     struct intel_batchbuffer *batch = i965->batch;
1752     struct i965_render_state *render_state = &i965->render_state;
1753
1754     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1755               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1756               (4 - 2));
1757     OUT_BATCH(batch, 0);
1758     OUT_BATCH(batch, 0);
1759     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1760 }
1761
1762 static void
1763 gen6_emit_urb(VADriverContextP ctx)
1764 {
1765     struct i965_driver_data *i965 = i965_driver_data(ctx);
1766     struct intel_batchbuffer *batch = i965->batch;
1767
1768     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1769     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1770               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1771     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1772               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1773 }
1774
1775 static void
1776 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1777 {
1778     struct i965_driver_data *i965 = i965_driver_data(ctx);
1779     struct intel_batchbuffer *batch = i965->batch;
1780     struct i965_render_state *render_state = &i965->render_state;
1781
1782     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1783     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1784     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1785     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1786 }
1787
1788 static void
1789 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1790 {
1791     struct i965_driver_data *i965 = i965_driver_data(ctx);
1792     struct intel_batchbuffer *batch = i965->batch;
1793     struct i965_render_state *render_state = &i965->render_state;
1794
1795     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1796               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1797               (4 - 2));
1798     OUT_BATCH(batch, 0); /* VS */
1799     OUT_BATCH(batch, 0); /* GS */
1800     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1801 }
1802
1803 static void
1804 gen6_emit_binding_table(VADriverContextP ctx)
1805 {
1806     struct i965_driver_data *i965 = i965_driver_data(ctx);
1807     struct intel_batchbuffer *batch = i965->batch;
1808
1809     /* Binding table pointers */
1810     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1811               GEN6_BINDING_TABLE_MODIFY_PS |
1812               (4 - 2));
1813     OUT_BATCH(batch, 0);                /* vs */
1814     OUT_BATCH(batch, 0);                /* gs */
1815     /* Only the PS uses the binding table */
1816     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1817 }
1818
1819 static void
1820 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1821 {
1822     struct i965_driver_data *i965 = i965_driver_data(ctx);
1823     struct intel_batchbuffer *batch = i965->batch;
1824
1825     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1826     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1827               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1828     OUT_BATCH(batch, 0);
1829     OUT_BATCH(batch, 0);
1830     OUT_BATCH(batch, 0);
1831     OUT_BATCH(batch, 0);
1832     OUT_BATCH(batch, 0);
1833
1834     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1835     OUT_BATCH(batch, 0);
1836 }
1837
1838 static void
1839 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1840 {
1841     i965_render_drawing_rectangle(ctx);
1842 }
1843
1844 static void 
1845 gen6_emit_vs_state(VADriverContextP ctx)
1846 {
1847     struct i965_driver_data *i965 = i965_driver_data(ctx);
1848     struct intel_batchbuffer *batch = i965->batch;
1849
1850     /* disable VS constant buffer */
1851     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1852     OUT_BATCH(batch, 0);
1853     OUT_BATCH(batch, 0);
1854     OUT_BATCH(batch, 0);
1855     OUT_BATCH(batch, 0);
1856         
1857     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1858     OUT_BATCH(batch, 0); /* without VS kernel */
1859     OUT_BATCH(batch, 0);
1860     OUT_BATCH(batch, 0);
1861     OUT_BATCH(batch, 0);
1862     OUT_BATCH(batch, 0); /* pass-through */
1863 }
1864
1865 static void 
1866 gen6_emit_gs_state(VADriverContextP ctx)
1867 {
1868     struct i965_driver_data *i965 = i965_driver_data(ctx);
1869     struct intel_batchbuffer *batch = i965->batch;
1870
1871     /* disable GS constant buffer */
1872     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1873     OUT_BATCH(batch, 0);
1874     OUT_BATCH(batch, 0);
1875     OUT_BATCH(batch, 0);
1876     OUT_BATCH(batch, 0);
1877         
1878     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1879     OUT_BATCH(batch, 0); /* without GS kernel */
1880     OUT_BATCH(batch, 0);
1881     OUT_BATCH(batch, 0);
1882     OUT_BATCH(batch, 0);
1883     OUT_BATCH(batch, 0);
1884     OUT_BATCH(batch, 0); /* pass-through */
1885 }
1886
1887 static void 
1888 gen6_emit_clip_state(VADriverContextP ctx)
1889 {
1890     struct i965_driver_data *i965 = i965_driver_data(ctx);
1891     struct intel_batchbuffer *batch = i965->batch;
1892
1893     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1894     OUT_BATCH(batch, 0);
1895     OUT_BATCH(batch, 0); /* pass-through */
1896     OUT_BATCH(batch, 0);
1897 }
1898
1899 static void 
1900 gen6_emit_sf_state(VADriverContextP ctx)
1901 {
1902     struct i965_driver_data *i965 = i965_driver_data(ctx);
1903     struct intel_batchbuffer *batch = i965->batch;
1904
1905     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1906     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1907               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1908               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1909     OUT_BATCH(batch, 0);
1910     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1911     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1912     OUT_BATCH(batch, 0);
1913     OUT_BATCH(batch, 0);
1914     OUT_BATCH(batch, 0);
1915     OUT_BATCH(batch, 0);
1916     OUT_BATCH(batch, 0); /* DW9 */
1917     OUT_BATCH(batch, 0);
1918     OUT_BATCH(batch, 0);
1919     OUT_BATCH(batch, 0);
1920     OUT_BATCH(batch, 0);
1921     OUT_BATCH(batch, 0); /* DW14 */
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924     OUT_BATCH(batch, 0);
1925     OUT_BATCH(batch, 0);
1926     OUT_BATCH(batch, 0); /* DW19 */
1927 }
1928
1929 static void 
1930 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1931 {
1932     struct i965_driver_data *i965 = i965_driver_data(ctx);
1933     struct intel_batchbuffer *batch = i965->batch;
1934     struct i965_render_state *render_state = &i965->render_state;
1935
1936     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1937               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1938               (5 - 2));
1939     OUT_RELOC(batch, 
1940               render_state->curbe.bo,
1941               I915_GEM_DOMAIN_INSTRUCTION, 0,
1942               0);
1943     OUT_BATCH(batch, 0);
1944     OUT_BATCH(batch, 0);
1945     OUT_BATCH(batch, 0);
1946
1947     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1948     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1949               I915_GEM_DOMAIN_INSTRUCTION, 0,
1950               0);
1951     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1952               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1953     OUT_BATCH(batch, 0);
1954     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1955     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1956               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1957               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1958     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1959               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1960     OUT_BATCH(batch, 0);
1961     OUT_BATCH(batch, 0);
1962 }
1963
1964 static void
1965 gen6_emit_vertex_element_state(VADriverContextP ctx)
1966 {
1967     struct i965_driver_data *i965 = i965_driver_data(ctx);
1968     struct intel_batchbuffer *batch = i965->batch;
1969
1970     /* Set up our vertex elements, sourced from the single vertex buffer. */
1971     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
1972     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1973     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1974               GEN6_VE0_VALID |
1975               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1976               (0 << VE0_OFFSET_SHIFT));
1977     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1978               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1979               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1980               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1981     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1982     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1983               GEN6_VE0_VALID |
1984               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1985               (8 << VE0_OFFSET_SHIFT));
1986     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1987               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1988               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1989               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1990 }
1991
1992 static void
1993 gen6_emit_vertices(VADriverContextP ctx)
1994 {
1995     struct i965_driver_data *i965 = i965_driver_data(ctx);
1996     struct intel_batchbuffer *batch = i965->batch;
1997     struct i965_render_state *render_state = &i965->render_state;
1998
1999     BEGIN_BATCH(batch, 11);
2000     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2001     OUT_BATCH(batch, 
2002               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2003               GEN6_VB0_VERTEXDATA |
2004               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2005     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2006     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2007     OUT_BATCH(batch, 0);
2008
2009     OUT_BATCH(batch, 
2010               CMD_3DPRIMITIVE |
2011               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2012               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2013               (0 << 9) |
2014               4);
2015     OUT_BATCH(batch, 3); /* vertex count per instance */
2016     OUT_BATCH(batch, 0); /* start vertex offset */
2017     OUT_BATCH(batch, 1); /* single instance */
2018     OUT_BATCH(batch, 0); /* start instance location */
2019     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2020     ADVANCE_BATCH(batch);
2021 }
2022
2023 static void
2024 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2025 {
2026     struct i965_driver_data *i965 = i965_driver_data(ctx);
2027     struct intel_batchbuffer *batch = i965->batch;
2028
2029     intel_batchbuffer_start_atomic(batch, 0x1000);
2030     intel_batchbuffer_emit_mi_flush(batch);
2031     gen6_emit_invarient_states(ctx);
2032     gen6_emit_state_base_address(ctx);
2033     gen6_emit_viewport_state_pointers(ctx);
2034     gen6_emit_urb(ctx);
2035     gen6_emit_cc_state_pointers(ctx);
2036     gen6_emit_sampler_state_pointers(ctx);
2037     gen6_emit_vs_state(ctx);
2038     gen6_emit_gs_state(ctx);
2039     gen6_emit_clip_state(ctx);
2040     gen6_emit_sf_state(ctx);
2041     gen6_emit_wm_state(ctx, kernel);
2042     gen6_emit_binding_table(ctx);
2043     gen6_emit_depth_buffer_state(ctx);
2044     gen6_emit_drawing_rectangle(ctx);
2045     gen6_emit_vertex_element_state(ctx);
2046     gen6_emit_vertices(ctx);
2047     intel_batchbuffer_end_atomic(batch);
2048 }
2049
2050 static void
2051 gen6_render_put_surface(
2052     VADriverContextP   ctx,
2053     VASurfaceID        surface,
2054     const VARectangle *src_rect,
2055     const VARectangle *dst_rect,
2056     unsigned int       flags
2057 )
2058 {
2059     struct i965_driver_data *i965 = i965_driver_data(ctx);
2060     struct intel_batchbuffer *batch = i965->batch;
2061
2062     gen6_render_initialize(ctx);
2063     gen6_render_setup_states(ctx, surface, src_rect, dst_rect);
2064     i965_clear_dest_region(ctx);
2065     gen6_render_emit_states(ctx, PS_KERNEL);
2066     intel_batchbuffer_flush(batch);
2067 }
2068
2069 static void
2070 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2071 {
2072     struct i965_driver_data *i965 = i965_driver_data(ctx);
2073     struct i965_render_state *render_state = &i965->render_state;
2074     struct gen6_blend_state *blend_state;
2075
2076     dri_bo_unmap(render_state->cc.state);    
2077     dri_bo_map(render_state->cc.blend, 1);
2078     assert(render_state->cc.blend->virtual);
2079     blend_state = render_state->cc.blend->virtual;
2080     memset(blend_state, 0, sizeof(*blend_state));
2081     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2082     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2083     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2084     blend_state->blend0.blend_enable = 1;
2085     blend_state->blend1.post_blend_clamp_enable = 1;
2086     blend_state->blend1.pre_blend_clamp_enable = 1;
2087     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2088     dri_bo_unmap(render_state->cc.blend);
2089 }
2090
2091 static void
2092 gen6_subpicture_render_setup_states(
2093     VADriverContextP   ctx,
2094     VASurfaceID        surface,
2095     const VARectangle *src_rect,
2096     const VARectangle *dst_rect
2097 )
2098 {
2099     i965_render_dest_surface_state(ctx, 0);
2100     i965_subpic_render_src_surfaces_state(ctx, surface);
2101     i965_render_sampler(ctx);
2102     i965_render_cc_viewport(ctx);
2103     gen6_render_color_calc_state(ctx);
2104     gen6_subpicture_render_blend_state(ctx);
2105     gen6_render_depth_stencil_state(ctx);
2106     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2107 }
2108
2109 static void
2110 gen6_render_put_subpicture(
2111     VADriverContextP   ctx,
2112     VASurfaceID        surface,
2113     const VARectangle *src_rect,
2114     const VARectangle *dst_rect
2115 )
2116 {
2117     struct i965_driver_data *i965 = i965_driver_data(ctx);
2118     struct intel_batchbuffer *batch = i965->batch;
2119     struct object_surface *obj_surface = SURFACE(surface);
2120     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2121
2122     assert(obj_subpic);
2123     gen6_render_initialize(ctx);
2124     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2125     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2126     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2127     intel_batchbuffer_flush(batch);
2128 }
2129
2130 /*
2131  * for GEN7
2132  */
2133 static void 
2134 gen7_render_initialize(VADriverContextP ctx)
2135 {
2136     struct i965_driver_data *i965 = i965_driver_data(ctx);
2137     struct i965_render_state *render_state = &i965->render_state;
2138     dri_bo *bo;
2139
2140     /* VERTEX BUFFER */
2141     dri_bo_unreference(render_state->vb.vertex_buffer);
2142     bo = dri_bo_alloc(i965->intel.bufmgr,
2143                       "vertex buffer",
2144                       4096,
2145                       4096);
2146     assert(bo);
2147     render_state->vb.vertex_buffer = bo;
2148
2149     /* WM */
2150     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2151     bo = dri_bo_alloc(i965->intel.bufmgr,
2152                       "surface state & binding table",
2153                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2154                       4096);
2155     assert(bo);
2156     render_state->wm.surface_state_binding_table_bo = bo;
2157
2158     dri_bo_unreference(render_state->wm.sampler);
2159     bo = dri_bo_alloc(i965->intel.bufmgr,
2160                       "sampler state",
2161                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2162                       4096);
2163     assert(bo);
2164     render_state->wm.sampler = bo;
2165     render_state->wm.sampler_count = 0;
2166
2167     /* COLOR CALCULATOR */
2168     dri_bo_unreference(render_state->cc.state);
2169     bo = dri_bo_alloc(i965->intel.bufmgr,
2170                       "color calc state",
2171                       sizeof(struct gen6_color_calc_state),
2172                       4096);
2173     assert(bo);
2174     render_state->cc.state = bo;
2175
2176     /* CC VIEWPORT */
2177     dri_bo_unreference(render_state->cc.viewport);
2178     bo = dri_bo_alloc(i965->intel.bufmgr,
2179                       "cc viewport",
2180                       sizeof(struct i965_cc_viewport),
2181                       4096);
2182     assert(bo);
2183     render_state->cc.viewport = bo;
2184
2185     /* BLEND STATE */
2186     dri_bo_unreference(render_state->cc.blend);
2187     bo = dri_bo_alloc(i965->intel.bufmgr,
2188                       "blend state",
2189                       sizeof(struct gen6_blend_state),
2190                       4096);
2191     assert(bo);
2192     render_state->cc.blend = bo;
2193
2194     /* DEPTH & STENCIL STATE */
2195     dri_bo_unreference(render_state->cc.depth_stencil);
2196     bo = dri_bo_alloc(i965->intel.bufmgr,
2197                       "depth & stencil state",
2198                       sizeof(struct gen6_depth_stencil_state),
2199                       4096);
2200     assert(bo);
2201     render_state->cc.depth_stencil = bo;
2202 }
2203
2204 static void
2205 gen7_render_color_calc_state(VADriverContextP ctx)
2206 {
2207     struct i965_driver_data *i965 = i965_driver_data(ctx);
2208     struct i965_render_state *render_state = &i965->render_state;
2209     struct gen6_color_calc_state *color_calc_state;
2210     
2211     dri_bo_map(render_state->cc.state, 1);
2212     assert(render_state->cc.state->virtual);
2213     color_calc_state = render_state->cc.state->virtual;
2214     memset(color_calc_state, 0, sizeof(*color_calc_state));
2215     color_calc_state->constant_r = 1.0;
2216     color_calc_state->constant_g = 0.0;
2217     color_calc_state->constant_b = 1.0;
2218     color_calc_state->constant_a = 1.0;
2219     dri_bo_unmap(render_state->cc.state);
2220 }
2221
2222 static void
2223 gen7_render_blend_state(VADriverContextP ctx)
2224 {
2225     struct i965_driver_data *i965 = i965_driver_data(ctx);
2226     struct i965_render_state *render_state = &i965->render_state;
2227     struct gen6_blend_state *blend_state;
2228     
2229     dri_bo_map(render_state->cc.blend, 1);
2230     assert(render_state->cc.blend->virtual);
2231     blend_state = render_state->cc.blend->virtual;
2232     memset(blend_state, 0, sizeof(*blend_state));
2233     blend_state->blend1.logic_op_enable = 1;
2234     blend_state->blend1.logic_op_func = 0xc;
2235     blend_state->blend1.pre_blend_clamp_enable = 1;
2236     dri_bo_unmap(render_state->cc.blend);
2237 }
2238
2239 static void
2240 gen7_render_depth_stencil_state(VADriverContextP ctx)
2241 {
2242     struct i965_driver_data *i965 = i965_driver_data(ctx);
2243     struct i965_render_state *render_state = &i965->render_state;
2244     struct gen6_depth_stencil_state *depth_stencil_state;
2245     
2246     dri_bo_map(render_state->cc.depth_stencil, 1);
2247     assert(render_state->cc.depth_stencil->virtual);
2248     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2249     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2250     dri_bo_unmap(render_state->cc.depth_stencil);
2251 }
2252
2253 static void 
2254 gen7_render_sampler(VADriverContextP ctx)
2255 {
2256     struct i965_driver_data *i965 = i965_driver_data(ctx);
2257     struct i965_render_state *render_state = &i965->render_state;
2258     struct gen7_sampler_state *sampler_state;
2259     int i;
2260     
2261     assert(render_state->wm.sampler_count > 0);
2262     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2263
2264     dri_bo_map(render_state->wm.sampler, 1);
2265     assert(render_state->wm.sampler->virtual);
2266     sampler_state = render_state->wm.sampler->virtual;
2267     for (i = 0; i < render_state->wm.sampler_count; i++) {
2268         memset(sampler_state, 0, sizeof(*sampler_state));
2269         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2270         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2271         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2272         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2273         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2274         sampler_state++;
2275     }
2276
2277     dri_bo_unmap(render_state->wm.sampler);
2278 }
2279
2280 static void
2281 gen7_render_setup_states(
2282     VADriverContextP   ctx,
2283     VASurfaceID        surface,
2284     const VARectangle *src_rect,
2285     const VARectangle *dst_rect
2286 )
2287 {
2288     i965_render_dest_surface_state(ctx, 0);
2289     i965_render_src_surfaces_state(ctx, surface);
2290     gen7_render_sampler(ctx);
2291     i965_render_cc_viewport(ctx);
2292     gen7_render_color_calc_state(ctx);
2293     gen7_render_blend_state(ctx);
2294     gen7_render_depth_stencil_state(ctx);
2295     i965_render_upload_constants(ctx, surface);
2296     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2297 }
2298
2299 static void
2300 gen7_emit_invarient_states(VADriverContextP ctx)
2301 {
2302     struct i965_driver_data *i965 = i965_driver_data(ctx);
2303     struct intel_batchbuffer *batch = i965->batch;
2304
2305     BEGIN_BATCH(batch, 1);
2306     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2307     ADVANCE_BATCH(batch);
2308
2309     BEGIN_BATCH(batch, 4);
2310     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2311     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2312               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2313     OUT_BATCH(batch, 0);
2314     OUT_BATCH(batch, 0);
2315     ADVANCE_BATCH(batch);
2316
2317     BEGIN_BATCH(batch, 2);
2318     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2319     OUT_BATCH(batch, 1);
2320     ADVANCE_BATCH(batch);
2321
2322     /* Set system instruction pointer */
2323     BEGIN_BATCH(batch, 2);
2324     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2325     OUT_BATCH(batch, 0);
2326     ADVANCE_BATCH(batch);
2327 }
2328
2329 static void
2330 gen7_emit_state_base_address(VADriverContextP ctx)
2331 {
2332     struct i965_driver_data *i965 = i965_driver_data(ctx);
2333     struct intel_batchbuffer *batch = i965->batch;
2334     struct i965_render_state *render_state = &i965->render_state;
2335
2336     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2337     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2338     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2339     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2340     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2341     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2342     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2343     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2344     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2345     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2346 }
2347
2348 static void
2349 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2350 {
2351     struct i965_driver_data *i965 = i965_driver_data(ctx);
2352     struct intel_batchbuffer *batch = i965->batch;
2353     struct i965_render_state *render_state = &i965->render_state;
2354
2355     BEGIN_BATCH(batch, 2);
2356     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2357     OUT_RELOC(batch,
2358               render_state->cc.viewport,
2359               I915_GEM_DOMAIN_INSTRUCTION, 0,
2360               0);
2361     ADVANCE_BATCH(batch);
2362
2363     BEGIN_BATCH(batch, 2);
2364     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2365     OUT_BATCH(batch, 0);
2366     ADVANCE_BATCH(batch);
2367 }
2368
2369 /*
2370  * URB layout on GEN7 
2371  * ----------------------------------------
2372  * | PS Push Constants (8KB) | VS entries |
2373  * ----------------------------------------
2374  */
2375 static void
2376 gen7_emit_urb(VADriverContextP ctx)
2377 {
2378     struct i965_driver_data *i965 = i965_driver_data(ctx);
2379     struct intel_batchbuffer *batch = i965->batch;
2380
2381     BEGIN_BATCH(batch, 2);
2382     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2383     OUT_BATCH(batch, 8); /* in 1KBs */
2384     ADVANCE_BATCH(batch);
2385
2386     BEGIN_BATCH(batch, 2);
2387     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2388     OUT_BATCH(batch, 
2389               (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
2390               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2391               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2392    ADVANCE_BATCH(batch);
2393
2394    BEGIN_BATCH(batch, 2);
2395    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2396    OUT_BATCH(batch,
2397              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2398              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2399    ADVANCE_BATCH(batch);
2400
2401    BEGIN_BATCH(batch, 2);
2402    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2403    OUT_BATCH(batch,
2404              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2405              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2406    ADVANCE_BATCH(batch);
2407
2408    BEGIN_BATCH(batch, 2);
2409    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2410    OUT_BATCH(batch,
2411              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2412              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2413    ADVANCE_BATCH(batch);
2414 }
2415
2416 static void
2417 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2418 {
2419     struct i965_driver_data *i965 = i965_driver_data(ctx);
2420     struct intel_batchbuffer *batch = i965->batch;
2421     struct i965_render_state *render_state = &i965->render_state;
2422
2423     BEGIN_BATCH(batch, 2);
2424     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2425     OUT_RELOC(batch,
2426               render_state->cc.state,
2427               I915_GEM_DOMAIN_INSTRUCTION, 0,
2428               1);
2429     ADVANCE_BATCH(batch);
2430
2431     BEGIN_BATCH(batch, 2);
2432     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2433     OUT_RELOC(batch,
2434               render_state->cc.blend,
2435               I915_GEM_DOMAIN_INSTRUCTION, 0,
2436               1);
2437     ADVANCE_BATCH(batch);
2438
2439     BEGIN_BATCH(batch, 2);
2440     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2441     OUT_RELOC(batch,
2442               render_state->cc.depth_stencil,
2443               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2444               1);
2445     ADVANCE_BATCH(batch);
2446 }
2447
2448 static void
2449 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2450 {
2451     struct i965_driver_data *i965 = i965_driver_data(ctx);
2452     struct intel_batchbuffer *batch = i965->batch;
2453     struct i965_render_state *render_state = &i965->render_state;
2454
2455     BEGIN_BATCH(batch, 2);
2456     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2457     OUT_RELOC(batch,
2458               render_state->wm.sampler,
2459               I915_GEM_DOMAIN_INSTRUCTION, 0,
2460               0);
2461     ADVANCE_BATCH(batch);
2462 }
2463
2464 static void
2465 gen7_emit_binding_table(VADriverContextP ctx)
2466 {
2467     struct i965_driver_data *i965 = i965_driver_data(ctx);
2468     struct intel_batchbuffer *batch = i965->batch;
2469
2470     BEGIN_BATCH(batch, 2);
2471     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2472     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2473     ADVANCE_BATCH(batch);
2474 }
2475
2476 static void
2477 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2478 {
2479     struct i965_driver_data *i965 = i965_driver_data(ctx);
2480     struct intel_batchbuffer *batch = i965->batch;
2481
2482     BEGIN_BATCH(batch, 7);
2483     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2484     OUT_BATCH(batch,
2485               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2486               (I965_SURFACE_NULL << 29));
2487     OUT_BATCH(batch, 0);
2488     OUT_BATCH(batch, 0);
2489     OUT_BATCH(batch, 0);
2490     OUT_BATCH(batch, 0);
2491     OUT_BATCH(batch, 0);
2492     ADVANCE_BATCH(batch);
2493
2494     BEGIN_BATCH(batch, 3);
2495     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2496     OUT_BATCH(batch, 0);
2497     OUT_BATCH(batch, 0);
2498     ADVANCE_BATCH(batch);
2499 }
2500
2501 static void
2502 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2503 {
2504     i965_render_drawing_rectangle(ctx);
2505 }
2506
2507 static void 
2508 gen7_emit_vs_state(VADriverContextP ctx)
2509 {
2510     struct i965_driver_data *i965 = i965_driver_data(ctx);
2511     struct intel_batchbuffer *batch = i965->batch;
2512
2513     /* disable VS constant buffer */
2514     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2515     OUT_BATCH(batch, 0);
2516     OUT_BATCH(batch, 0);
2517     OUT_BATCH(batch, 0);
2518     OUT_BATCH(batch, 0);
2519     OUT_BATCH(batch, 0);
2520     OUT_BATCH(batch, 0);
2521         
2522     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2523     OUT_BATCH(batch, 0); /* without VS kernel */
2524     OUT_BATCH(batch, 0);
2525     OUT_BATCH(batch, 0);
2526     OUT_BATCH(batch, 0);
2527     OUT_BATCH(batch, 0); /* pass-through */
2528 }
2529
2530 static void 
2531 gen7_emit_bypass_state(VADriverContextP ctx)
2532 {
2533     struct i965_driver_data *i965 = i965_driver_data(ctx);
2534     struct intel_batchbuffer *batch = i965->batch;
2535
2536     /* bypass GS */
2537     BEGIN_BATCH(batch, 7);
2538     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2539     OUT_BATCH(batch, 0);
2540     OUT_BATCH(batch, 0);
2541     OUT_BATCH(batch, 0);
2542     OUT_BATCH(batch, 0);
2543     OUT_BATCH(batch, 0);
2544     OUT_BATCH(batch, 0);
2545     ADVANCE_BATCH(batch);
2546
2547     BEGIN_BATCH(batch, 7);      
2548     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2549     OUT_BATCH(batch, 0); /* without GS kernel */
2550     OUT_BATCH(batch, 0);
2551     OUT_BATCH(batch, 0);
2552     OUT_BATCH(batch, 0);
2553     OUT_BATCH(batch, 0);
2554     OUT_BATCH(batch, 0); /* pass-through */
2555     ADVANCE_BATCH(batch);
2556
2557     BEGIN_BATCH(batch, 2);
2558     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2559     OUT_BATCH(batch, 0);
2560     ADVANCE_BATCH(batch);
2561
2562     /* disable HS */
2563     BEGIN_BATCH(batch, 7);
2564     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2565     OUT_BATCH(batch, 0);
2566     OUT_BATCH(batch, 0);
2567     OUT_BATCH(batch, 0);
2568     OUT_BATCH(batch, 0);
2569     OUT_BATCH(batch, 0);
2570     OUT_BATCH(batch, 0);
2571     ADVANCE_BATCH(batch);
2572
2573     BEGIN_BATCH(batch, 7);
2574     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2575     OUT_BATCH(batch, 0);
2576     OUT_BATCH(batch, 0);
2577     OUT_BATCH(batch, 0);
2578     OUT_BATCH(batch, 0);
2579     OUT_BATCH(batch, 0);
2580     OUT_BATCH(batch, 0);
2581     ADVANCE_BATCH(batch);
2582
2583     BEGIN_BATCH(batch, 2);
2584     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2585     OUT_BATCH(batch, 0);
2586     ADVANCE_BATCH(batch);
2587
2588     /* Disable TE */
2589     BEGIN_BATCH(batch, 4);
2590     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2591     OUT_BATCH(batch, 0);
2592     OUT_BATCH(batch, 0);
2593     OUT_BATCH(batch, 0);
2594     ADVANCE_BATCH(batch);
2595
2596     /* Disable DS */
2597     BEGIN_BATCH(batch, 7);
2598     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2599     OUT_BATCH(batch, 0);
2600     OUT_BATCH(batch, 0);
2601     OUT_BATCH(batch, 0);
2602     OUT_BATCH(batch, 0);
2603     OUT_BATCH(batch, 0);
2604     OUT_BATCH(batch, 0);
2605     ADVANCE_BATCH(batch);
2606
2607     BEGIN_BATCH(batch, 6);
2608     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2609     OUT_BATCH(batch, 0);
2610     OUT_BATCH(batch, 0);
2611     OUT_BATCH(batch, 0);
2612     OUT_BATCH(batch, 0);
2613     OUT_BATCH(batch, 0);
2614     ADVANCE_BATCH(batch);
2615
2616     BEGIN_BATCH(batch, 2);
2617     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2618     OUT_BATCH(batch, 0);
2619     ADVANCE_BATCH(batch);
2620
2621     /* Disable STREAMOUT */
2622     BEGIN_BATCH(batch, 3);
2623     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2624     OUT_BATCH(batch, 0);
2625     OUT_BATCH(batch, 0);
2626     ADVANCE_BATCH(batch);
2627 }
2628
2629 static void 
2630 gen7_emit_clip_state(VADriverContextP ctx)
2631 {
2632     struct i965_driver_data *i965 = i965_driver_data(ctx);
2633     struct intel_batchbuffer *batch = i965->batch;
2634
2635     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2636     OUT_BATCH(batch, 0);
2637     OUT_BATCH(batch, 0); /* pass-through */
2638     OUT_BATCH(batch, 0);
2639 }
2640
2641 static void 
2642 gen7_emit_sf_state(VADriverContextP ctx)
2643 {
2644     struct i965_driver_data *i965 = i965_driver_data(ctx);
2645     struct intel_batchbuffer *batch = i965->batch;
2646
2647     BEGIN_BATCH(batch, 14);
2648     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2649     OUT_BATCH(batch,
2650               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2651               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2652               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     OUT_BATCH(batch, 0); /* DW4 */
2656     OUT_BATCH(batch, 0);
2657     OUT_BATCH(batch, 0);
2658     OUT_BATCH(batch, 0);
2659     OUT_BATCH(batch, 0);
2660     OUT_BATCH(batch, 0); /* DW9 */
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0);
2665     ADVANCE_BATCH(batch);
2666
2667     BEGIN_BATCH(batch, 7);
2668     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2671     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2672     OUT_BATCH(batch, 0);
2673     OUT_BATCH(batch, 0);
2674     OUT_BATCH(batch, 0);
2675     ADVANCE_BATCH(batch);
2676 }
2677
2678 static void 
2679 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2680 {
2681     struct i965_driver_data *i965 = i965_driver_data(ctx);
2682     struct intel_batchbuffer *batch = i965->batch;
2683     struct i965_render_state *render_state = &i965->render_state;
2684
2685     BEGIN_BATCH(batch, 3);
2686     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2687     OUT_BATCH(batch,
2688               GEN7_WM_DISPATCH_ENABLE |
2689               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2690     OUT_BATCH(batch, 0);
2691     ADVANCE_BATCH(batch);
2692
2693     BEGIN_BATCH(batch, 7);
2694     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2695     OUT_BATCH(batch, 1);
2696     OUT_BATCH(batch, 0);
2697     OUT_RELOC(batch, 
2698               render_state->curbe.bo,
2699               I915_GEM_DOMAIN_INSTRUCTION, 0,
2700               0);
2701     OUT_BATCH(batch, 0);
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     ADVANCE_BATCH(batch);
2705
2706     BEGIN_BATCH(batch, 8);
2707     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2708     OUT_RELOC(batch, 
2709               render_state->render_kernels[kernel].bo,
2710               I915_GEM_DOMAIN_INSTRUCTION, 0,
2711               0);
2712     OUT_BATCH(batch, 
2713               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2714               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2715     OUT_BATCH(batch, 0); /* scratch space base offset */
2716     OUT_BATCH(batch, 
2717               ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
2718               GEN7_PS_PUSH_CONSTANT_ENABLE |
2719               GEN7_PS_ATTRIBUTE_ENABLE |
2720               GEN7_PS_16_DISPATCH_ENABLE);
2721     OUT_BATCH(batch, 
2722               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2723     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2724     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2725     ADVANCE_BATCH(batch);
2726 }
2727
2728 static void
2729 gen7_emit_vertex_element_state(VADriverContextP ctx)
2730 {
2731     struct i965_driver_data *i965 = i965_driver_data(ctx);
2732     struct intel_batchbuffer *batch = i965->batch;
2733
2734     /* Set up our vertex elements, sourced from the single vertex buffer. */
2735     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2736     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2737     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2738               GEN6_VE0_VALID |
2739               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2740               (0 << VE0_OFFSET_SHIFT));
2741     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2742               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2743               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2744               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2745     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2746     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2747               GEN6_VE0_VALID |
2748               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2749               (8 << VE0_OFFSET_SHIFT));
2750     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2751               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2752               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2753               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2754 }
2755
2756 static void
2757 gen7_emit_vertices(VADriverContextP ctx)
2758 {
2759     struct i965_driver_data *i965 = i965_driver_data(ctx);
2760     struct intel_batchbuffer *batch = i965->batch;
2761     struct i965_render_state *render_state = &i965->render_state;
2762
2763     BEGIN_BATCH(batch, 5);
2764     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2765     OUT_BATCH(batch, 
2766               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2767               GEN6_VB0_VERTEXDATA |
2768               GEN7_VB0_ADDRESS_MODIFYENABLE |
2769               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2770     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2771     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2772     OUT_BATCH(batch, 0);
2773     ADVANCE_BATCH(batch);
2774
2775     BEGIN_BATCH(batch, 7);
2776     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2777     OUT_BATCH(batch,
2778               _3DPRIM_RECTLIST |
2779               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2780     OUT_BATCH(batch, 3); /* vertex count per instance */
2781     OUT_BATCH(batch, 0); /* start vertex offset */
2782     OUT_BATCH(batch, 1); /* single instance */
2783     OUT_BATCH(batch, 0); /* start instance location */
2784     OUT_BATCH(batch, 0);
2785     ADVANCE_BATCH(batch);
2786 }
2787
2788 static void
2789 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2790 {
2791     struct i965_driver_data *i965 = i965_driver_data(ctx);
2792     struct intel_batchbuffer *batch = i965->batch;
2793
2794     intel_batchbuffer_start_atomic(batch, 0x1000);
2795     intel_batchbuffer_emit_mi_flush(batch);
2796     gen7_emit_invarient_states(ctx);
2797     gen7_emit_state_base_address(ctx);
2798     gen7_emit_viewport_state_pointers(ctx);
2799     gen7_emit_urb(ctx);
2800     gen7_emit_cc_state_pointers(ctx);
2801     gen7_emit_sampler_state_pointers(ctx);
2802     gen7_emit_bypass_state(ctx);
2803     gen7_emit_vs_state(ctx);
2804     gen7_emit_clip_state(ctx);
2805     gen7_emit_sf_state(ctx);
2806     gen7_emit_wm_state(ctx, kernel);
2807     gen7_emit_binding_table(ctx);
2808     gen7_emit_depth_buffer_state(ctx);
2809     gen7_emit_drawing_rectangle(ctx);
2810     gen7_emit_vertex_element_state(ctx);
2811     gen7_emit_vertices(ctx);
2812     intel_batchbuffer_end_atomic(batch);
2813 }
2814
2815 static void
2816 gen7_render_put_surface(
2817     VADriverContextP   ctx,
2818     VASurfaceID        surface,
2819     const VARectangle *src_rect,
2820     const VARectangle *dst_rect,
2821     unsigned int       flags
2822 )
2823 {
2824     struct i965_driver_data *i965 = i965_driver_data(ctx);
2825     struct intel_batchbuffer *batch = i965->batch;
2826
2827     gen7_render_initialize(ctx);
2828     gen7_render_setup_states(ctx, surface, src_rect, dst_rect);
2829     i965_clear_dest_region(ctx);
2830     gen7_render_emit_states(ctx, PS_KERNEL);
2831     intel_batchbuffer_flush(batch);
2832 }
2833
2834 static void
2835 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2836 {
2837     struct i965_driver_data *i965 = i965_driver_data(ctx);
2838     struct i965_render_state *render_state = &i965->render_state;
2839     struct gen6_blend_state *blend_state;
2840
2841     dri_bo_unmap(render_state->cc.state);    
2842     dri_bo_map(render_state->cc.blend, 1);
2843     assert(render_state->cc.blend->virtual);
2844     blend_state = render_state->cc.blend->virtual;
2845     memset(blend_state, 0, sizeof(*blend_state));
2846     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2847     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2848     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2849     blend_state->blend0.blend_enable = 1;
2850     blend_state->blend1.post_blend_clamp_enable = 1;
2851     blend_state->blend1.pre_blend_clamp_enable = 1;
2852     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2853     dri_bo_unmap(render_state->cc.blend);
2854 }
2855
2856 static void
2857 gen7_subpicture_render_setup_states(
2858     VADriverContextP   ctx,
2859     VASurfaceID        surface,
2860     const VARectangle *src_rect,
2861     const VARectangle *dst_rect
2862 )
2863 {
2864     i965_render_dest_surface_state(ctx, 0);
2865     i965_subpic_render_src_surfaces_state(ctx, surface);
2866     i965_render_sampler(ctx);
2867     i965_render_cc_viewport(ctx);
2868     gen7_render_color_calc_state(ctx);
2869     gen7_subpicture_render_blend_state(ctx);
2870     gen7_render_depth_stencil_state(ctx);
2871     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2872 }
2873
2874 static void
2875 gen7_render_put_subpicture(
2876     VADriverContextP   ctx,
2877     VASurfaceID        surface,
2878     const VARectangle *src_rect,
2879     const VARectangle *dst_rect
2880 )
2881 {
2882     struct i965_driver_data *i965 = i965_driver_data(ctx);
2883     struct intel_batchbuffer *batch = i965->batch;
2884     struct object_surface *obj_surface = SURFACE(surface);
2885     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2886
2887     assert(obj_subpic);
2888     gen7_render_initialize(ctx);
2889     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2890     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2891     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2892     intel_batchbuffer_flush(batch);
2893 }
2894
2895
2896 /*
2897  * global functions
2898  */
2899 VAStatus 
2900 i965_DestroySurfaces(VADriverContextP ctx,
2901                      VASurfaceID *surface_list,
2902                      int num_surfaces);
2903 void
2904 intel_render_put_surface(
2905     VADriverContextP   ctx,
2906     VASurfaceID        surface,
2907     const VARectangle *src_rect,
2908     const VARectangle *dst_rect,
2909     unsigned int       flags
2910 )
2911 {
2912     struct i965_driver_data *i965 = i965_driver_data(ctx);
2913     int has_done_scaling = 0;
2914     VASurfaceID in_surface_id = surface;
2915     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
2916
2917     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
2918
2919     if (out_surface_id != VA_INVALID_ID)
2920         in_surface_id = out_surface_id;
2921
2922     if (IS_GEN7(i965->intel.device_id))
2923         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2924     else if (IS_GEN6(i965->intel.device_id))
2925         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2926     else
2927         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2928
2929     if (in_surface_id != surface)
2930         i965_DestroySurfaces(ctx, &in_surface_id, 1);
2931 }
2932
2933 void
2934 intel_render_put_subpicture(
2935     VADriverContextP   ctx,
2936     VASurfaceID        surface,
2937     const VARectangle *src_rect,
2938     const VARectangle *dst_rect
2939 )
2940 {
2941     struct i965_driver_data *i965 = i965_driver_data(ctx);
2942
2943     if (IS_GEN7(i965->intel.device_id))
2944         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2945     else if (IS_GEN6(i965->intel.device_id))
2946         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2947     else
2948         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2949 }
2950
2951 Bool 
2952 i965_render_init(VADriverContextP ctx)
2953 {
2954     struct i965_driver_data *i965 = i965_driver_data(ctx);
2955     struct i965_render_state *render_state = &i965->render_state;
2956     int i;
2957
2958     /* kernel */
2959     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2960                                  sizeof(render_kernels_gen5[0])));
2961     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2962                                  sizeof(render_kernels_gen6[0])));
2963
2964     if (IS_GEN7(i965->intel.device_id))
2965         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
2966     else if (IS_GEN6(i965->intel.device_id))
2967         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2968     else if (IS_IRONLAKE(i965->intel.device_id))
2969         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2970     else
2971         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2972
2973     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2974         struct i965_kernel *kernel = &render_state->render_kernels[i];
2975
2976         if (!kernel->size)
2977             continue;
2978
2979         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2980                                   kernel->name, 
2981                                   kernel->size, 0x1000);
2982         assert(kernel->bo);
2983         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2984     }
2985
2986     /* constant buffer */
2987     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2988                       "constant buffer",
2989                       4096, 64);
2990     assert(render_state->curbe.bo);
2991
2992     return True;
2993 }
2994
2995 Bool 
2996 i965_render_terminate(VADriverContextP ctx)
2997 {
2998     int i;
2999     struct i965_driver_data *i965 = i965_driver_data(ctx);
3000     struct i965_render_state *render_state = &i965->render_state;
3001
3002     dri_bo_unreference(render_state->curbe.bo);
3003     render_state->curbe.bo = NULL;
3004
3005     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3006         struct i965_kernel *kernel = &render_state->render_kernels[i];
3007         
3008         dri_bo_unreference(kernel->bo);
3009         kernel->bo = NULL;
3010     }
3011
3012     dri_bo_unreference(render_state->vb.vertex_buffer);
3013     render_state->vb.vertex_buffer = NULL;
3014     dri_bo_unreference(render_state->vs.state);
3015     render_state->vs.state = NULL;
3016     dri_bo_unreference(render_state->sf.state);
3017     render_state->sf.state = NULL;
3018     dri_bo_unreference(render_state->wm.sampler);
3019     render_state->wm.sampler = NULL;
3020     dri_bo_unreference(render_state->wm.state);
3021     render_state->wm.state = NULL;
3022     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3023     dri_bo_unreference(render_state->cc.viewport);
3024     render_state->cc.viewport = NULL;
3025     dri_bo_unreference(render_state->cc.state);
3026     render_state->cc.state = NULL;
3027     dri_bo_unreference(render_state->cc.blend);
3028     render_state->cc.blend = NULL;
3029     dri_bo_unreference(render_state->cc.depth_stencil);
3030     render_state->cc.depth_stencil = NULL;
3031
3032     if (render_state->draw_region) {
3033         dri_bo_unreference(render_state->draw_region->bo);
3034         free(render_state->draw_region);
3035         render_state->draw_region = NULL;
3036     }
3037
3038     return True;
3039 }
3040