fada70b59a10e1f7403592bb992daf436da141a9
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_dricommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
137 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
138 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
139 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
140 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
141
142 static uint32_t float_to_uint (float f) 
143 {
144     union {
145         uint32_t i; 
146         float f;
147     } x;
148
149     x.f = f;
150     return x.i;
151 }
152
153 enum 
154 {
155     SF_KERNEL = 0,
156     PS_KERNEL,
157     PS_SUBPIC_KERNEL
158 };
159
160 static struct i965_kernel render_kernels_gen4[] = {
161     {
162         "SF",
163         SF_KERNEL,
164         sf_kernel_static,
165         sizeof(sf_kernel_static),
166         NULL
167     },
168     {
169         "PS",
170         PS_KERNEL,
171         ps_kernel_static,
172         sizeof(ps_kernel_static),
173         NULL
174     },
175
176     {
177         "PS_SUBPIC",
178         PS_SUBPIC_KERNEL,
179         ps_subpic_kernel_static,
180         sizeof(ps_subpic_kernel_static),
181         NULL
182     }
183 };
184
185 static struct i965_kernel render_kernels_gen5[] = {
186     {
187         "SF",
188         SF_KERNEL,
189         sf_kernel_static_gen5,
190         sizeof(sf_kernel_static_gen5),
191         NULL
192     },
193     {
194         "PS",
195         PS_KERNEL,
196         ps_kernel_static_gen5,
197         sizeof(ps_kernel_static_gen5),
198         NULL
199     },
200
201     {
202         "PS_SUBPIC",
203         PS_SUBPIC_KERNEL,
204         ps_subpic_kernel_static_gen5,
205         sizeof(ps_subpic_kernel_static_gen5),
206         NULL
207     }
208 };
209
210 static struct i965_kernel render_kernels_gen6[] = {
211     {
212         "SF",
213         SF_KERNEL,
214         sf_kernel_static_gen6,
215         sizeof(sf_kernel_static_gen6),
216         NULL
217     },
218     {
219         "PS",
220         PS_KERNEL,
221         ps_kernel_static_gen6,
222         sizeof(ps_kernel_static_gen6),
223         NULL
224     },
225
226     {
227         "PS_SUBPIC",
228         PS_SUBPIC_KERNEL,
229         ps_subpic_kernel_static_gen6,
230         sizeof(ps_subpic_kernel_static_gen6),
231         NULL
232     }
233 };
234
235 static struct i965_kernel render_kernels_gen7[] = {
236     {
237         "SF",
238         SF_KERNEL,
239         sf_kernel_static_gen7,
240         sizeof(sf_kernel_static_gen7),
241         NULL
242     },
243     {
244         "PS",
245         PS_KERNEL,
246         ps_kernel_static_gen7,
247         sizeof(ps_kernel_static_gen7),
248         NULL
249     },
250
251     {
252         "PS_SUBPIC",
253         PS_SUBPIC_KERNEL,
254         ps_subpic_kernel_static_gen7,
255         sizeof(ps_subpic_kernel_static_gen7),
256         NULL
257     }
258 };
259
260 #define URB_VS_ENTRIES        8
261 #define URB_VS_ENTRY_SIZE     1
262
263 #define URB_GS_ENTRIES        0
264 #define URB_GS_ENTRY_SIZE     0
265
266 #define URB_CLIP_ENTRIES      0
267 #define URB_CLIP_ENTRY_SIZE   0
268
269 #define URB_SF_ENTRIES        1
270 #define URB_SF_ENTRY_SIZE     2
271
272 #define URB_CS_ENTRIES        1
273 #define URB_CS_ENTRY_SIZE     1
274
275 static void
276 i965_render_vs_unit(VADriverContextP ctx)
277 {
278     struct i965_driver_data *i965 = i965_driver_data(ctx);
279     struct i965_render_state *render_state = &i965->render_state;
280     struct i965_vs_unit_state *vs_state;
281
282     dri_bo_map(render_state->vs.state, 1);
283     assert(render_state->vs.state->virtual);
284     vs_state = render_state->vs.state->virtual;
285     memset(vs_state, 0, sizeof(*vs_state));
286
287     if (IS_IRONLAKE(i965->intel.device_id))
288         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
289     else
290         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
291
292     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
293     vs_state->vs6.vs_enable = 0;
294     vs_state->vs6.vert_cache_disable = 1;
295     
296     dri_bo_unmap(render_state->vs.state);
297 }
298
299 static void
300 i965_render_sf_unit(VADriverContextP ctx)
301 {
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     struct i965_render_state *render_state = &i965->render_state;
304     struct i965_sf_unit_state *sf_state;
305
306     dri_bo_map(render_state->sf.state, 1);
307     assert(render_state->sf.state->virtual);
308     sf_state = render_state->sf.state->virtual;
309     memset(sf_state, 0, sizeof(*sf_state));
310
311     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
312     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
313
314     sf_state->sf1.single_program_flow = 1; /* XXX */
315     sf_state->sf1.binding_table_entry_count = 0;
316     sf_state->sf1.thread_priority = 0;
317     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
318     sf_state->sf1.illegal_op_exception_enable = 1;
319     sf_state->sf1.mask_stack_exception_enable = 1;
320     sf_state->sf1.sw_exception_enable = 1;
321
322     /* scratch space is not used in our kernel */
323     sf_state->thread2.per_thread_scratch_space = 0;
324     sf_state->thread2.scratch_space_base_pointer = 0;
325
326     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
327     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
328     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
329     sf_state->thread3.urb_entry_read_offset = 0;
330     sf_state->thread3.dispatch_grf_start_reg = 3;
331
332     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
333     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
334     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
335     sf_state->thread4.stats_enable = 1;
336
337     sf_state->sf5.viewport_transform = 0; /* skip viewport */
338
339     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
340     sf_state->sf6.scissor = 0;
341
342     sf_state->sf7.trifan_pv = 2;
343
344     sf_state->sf6.dest_org_vbias = 0x8;
345     sf_state->sf6.dest_org_hbias = 0x8;
346
347     dri_bo_emit_reloc(render_state->sf.state,
348                       I915_GEM_DOMAIN_INSTRUCTION, 0,
349                       sf_state->thread0.grf_reg_count << 1,
350                       offsetof(struct i965_sf_unit_state, thread0),
351                       render_state->render_kernels[SF_KERNEL].bo);
352
353     dri_bo_unmap(render_state->sf.state);
354 }
355
356 static void 
357 i965_render_sampler(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sampler_state *sampler_state;
362     int i;
363     
364     assert(render_state->wm.sampler_count > 0);
365     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
366
367     dri_bo_map(render_state->wm.sampler, 1);
368     assert(render_state->wm.sampler->virtual);
369     sampler_state = render_state->wm.sampler->virtual;
370     for (i = 0; i < render_state->wm.sampler_count; i++) {
371         memset(sampler_state, 0, sizeof(*sampler_state));
372         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
373         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
374         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
375         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
376         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
377         sampler_state++;
378     }
379
380     dri_bo_unmap(render_state->wm.sampler);
381 }
382 static void
383 i965_subpic_render_wm_unit(VADriverContextP ctx)
384 {
385     struct i965_driver_data *i965 = i965_driver_data(ctx);
386     struct i965_render_state *render_state = &i965->render_state;
387     struct i965_wm_unit_state *wm_state;
388
389     assert(render_state->wm.sampler);
390
391     dri_bo_map(render_state->wm.state, 1);
392     assert(render_state->wm.state->virtual);
393     wm_state = render_state->wm.state->virtual;
394     memset(wm_state, 0, sizeof(*wm_state));
395
396     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
397     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
398
399     wm_state->thread1.single_program_flow = 1; /* XXX */
400
401     if (IS_IRONLAKE(i965->intel.device_id))
402         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
403     else
404         wm_state->thread1.binding_table_entry_count = 7;
405
406     wm_state->thread2.scratch_space_base_pointer = 0;
407     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
408
409     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
410     wm_state->thread3.const_urb_entry_read_length = 0;
411     wm_state->thread3.const_urb_entry_read_offset = 0;
412     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
413     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
414
415     wm_state->wm4.stats_enable = 0;
416     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
417
418     if (IS_IRONLAKE(i965->intel.device_id)) {
419         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
420         wm_state->wm5.max_threads = 12 * 6 - 1;
421     } else {
422         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
423         wm_state->wm5.max_threads = 10 * 5 - 1;
424     }
425
426     wm_state->wm5.thread_dispatch_enable = 1;
427     wm_state->wm5.enable_16_pix = 1;
428     wm_state->wm5.enable_8_pix = 0;
429     wm_state->wm5.early_depth_test = 1;
430
431     dri_bo_emit_reloc(render_state->wm.state,
432                       I915_GEM_DOMAIN_INSTRUCTION, 0,
433                       wm_state->thread0.grf_reg_count << 1,
434                       offsetof(struct i965_wm_unit_state, thread0),
435                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
436
437     dri_bo_emit_reloc(render_state->wm.state,
438                       I915_GEM_DOMAIN_INSTRUCTION, 0,
439                       wm_state->wm4.sampler_count << 2,
440                       offsetof(struct i965_wm_unit_state, wm4),
441                       render_state->wm.sampler);
442
443     dri_bo_unmap(render_state->wm.state);
444 }
445
446
447 static void
448 i965_render_wm_unit(VADriverContextP ctx)
449 {
450     struct i965_driver_data *i965 = i965_driver_data(ctx);
451     struct i965_render_state *render_state = &i965->render_state;
452     struct i965_wm_unit_state *wm_state;
453
454     assert(render_state->wm.sampler);
455
456     dri_bo_map(render_state->wm.state, 1);
457     assert(render_state->wm.state->virtual);
458     wm_state = render_state->wm.state->virtual;
459     memset(wm_state, 0, sizeof(*wm_state));
460
461     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
462     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
463
464     wm_state->thread1.single_program_flow = 1; /* XXX */
465
466     if (IS_IRONLAKE(i965->intel.device_id))
467         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
468     else
469         wm_state->thread1.binding_table_entry_count = 7;
470
471     wm_state->thread2.scratch_space_base_pointer = 0;
472     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
473
474     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
475     wm_state->thread3.const_urb_entry_read_length = 1;
476     wm_state->thread3.const_urb_entry_read_offset = 0;
477     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
478     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
479
480     wm_state->wm4.stats_enable = 0;
481     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
482
483     if (IS_IRONLAKE(i965->intel.device_id)) {
484         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
485         wm_state->wm5.max_threads = 12 * 6 - 1;
486     } else {
487         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
488         wm_state->wm5.max_threads = 10 * 5 - 1;
489     }
490
491     wm_state->wm5.thread_dispatch_enable = 1;
492     wm_state->wm5.enable_16_pix = 1;
493     wm_state->wm5.enable_8_pix = 0;
494     wm_state->wm5.early_depth_test = 1;
495
496     dri_bo_emit_reloc(render_state->wm.state,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       wm_state->thread0.grf_reg_count << 1,
499                       offsetof(struct i965_wm_unit_state, thread0),
500                       render_state->render_kernels[PS_KERNEL].bo);
501
502     dri_bo_emit_reloc(render_state->wm.state,
503                       I915_GEM_DOMAIN_INSTRUCTION, 0,
504                       wm_state->wm4.sampler_count << 2,
505                       offsetof(struct i965_wm_unit_state, wm4),
506                       render_state->wm.sampler);
507
508     dri_bo_unmap(render_state->wm.state);
509 }
510
511 static void 
512 i965_render_cc_viewport(VADriverContextP ctx)
513 {
514     struct i965_driver_data *i965 = i965_driver_data(ctx);
515     struct i965_render_state *render_state = &i965->render_state;
516     struct i965_cc_viewport *cc_viewport;
517
518     dri_bo_map(render_state->cc.viewport, 1);
519     assert(render_state->cc.viewport->virtual);
520     cc_viewport = render_state->cc.viewport->virtual;
521     memset(cc_viewport, 0, sizeof(*cc_viewport));
522     
523     cc_viewport->min_depth = -1.e35;
524     cc_viewport->max_depth = 1.e35;
525
526     dri_bo_unmap(render_state->cc.viewport);
527 }
528
529 static void 
530 i965_subpic_render_cc_unit(VADriverContextP ctx)
531 {
532     struct i965_driver_data *i965 = i965_driver_data(ctx);
533     struct i965_render_state *render_state = &i965->render_state;
534     struct i965_cc_unit_state *cc_state;
535
536     assert(render_state->cc.viewport);
537
538     dri_bo_map(render_state->cc.state, 1);
539     assert(render_state->cc.state->virtual);
540     cc_state = render_state->cc.state->virtual;
541     memset(cc_state, 0, sizeof(*cc_state));
542
543     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
544     cc_state->cc2.depth_test = 0;       /* disable depth test */
545     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
546     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
547     cc_state->cc3.blend_enable = 1;     /* enable color blend */
548     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
549     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
550     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
551     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
552
553     cc_state->cc5.dither_enable = 0;    /* disable dither */
554     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
555     cc_state->cc5.statistics_enable = 1;
556     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
557     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
558     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
559
560     cc_state->cc6.clamp_post_alpha_blend = 0; 
561     cc_state->cc6.clamp_pre_alpha_blend  =0; 
562     
563     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
564     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
565     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
566     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
567    
568     /*alpha test reference*/
569     cc_state->cc7.alpha_ref.f =0.0 ;
570
571
572     dri_bo_emit_reloc(render_state->cc.state,
573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
574                       0,
575                       offsetof(struct i965_cc_unit_state, cc4),
576                       render_state->cc.viewport);
577
578     dri_bo_unmap(render_state->cc.state);
579 }
580
581
582 static void 
583 i965_render_cc_unit(VADriverContextP ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     struct i965_render_state *render_state = &i965->render_state;
587     struct i965_cc_unit_state *cc_state;
588
589     assert(render_state->cc.viewport);
590
591     dri_bo_map(render_state->cc.state, 1);
592     assert(render_state->cc.state->virtual);
593     cc_state = render_state->cc.state->virtual;
594     memset(cc_state, 0, sizeof(*cc_state));
595
596     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
597     cc_state->cc2.depth_test = 0;       /* disable depth test */
598     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
599     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
600     cc_state->cc3.blend_enable = 0;     /* disable color blend */
601     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
602     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
603
604     cc_state->cc5.dither_enable = 0;    /* disable dither */
605     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
606     cc_state->cc5.statistics_enable = 1;
607     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
608     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
609     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
610
611     dri_bo_emit_reloc(render_state->cc.state,
612                       I915_GEM_DOMAIN_INSTRUCTION, 0,
613                       0,
614                       offsetof(struct i965_cc_unit_state, cc4),
615                       render_state->cc.viewport);
616
617     dri_bo_unmap(render_state->cc.state);
618 }
619
620 static void
621 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
622 {
623     switch (tiling) {
624     case I915_TILING_NONE:
625         ss->ss3.tiled_surface = 0;
626         ss->ss3.tile_walk = 0;
627         break;
628     case I915_TILING_X:
629         ss->ss3.tiled_surface = 1;
630         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
631         break;
632     case I915_TILING_Y:
633         ss->ss3.tiled_surface = 1;
634         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
635         break;
636     }
637 }
638
639 static void
640 i965_render_set_surface_state(struct i965_surface_state *ss,
641                               dri_bo *bo, unsigned long offset,
642                               int width, int height,
643                               int pitch, int format)
644 {
645     unsigned int tiling;
646     unsigned int swizzle;
647
648     memset(ss, 0, sizeof(*ss));
649     ss->ss0.surface_type = I965_SURFACE_2D;
650     ss->ss0.surface_format = format;
651     ss->ss0.color_blend = 1;
652
653     ss->ss1.base_addr = bo->offset + offset;
654
655     ss->ss2.width = width - 1;
656     ss->ss2.height = height - 1;
657
658     ss->ss3.pitch = pitch - 1;
659
660     dri_bo_get_tiling(bo, &tiling, &swizzle);
661     i965_render_set_surface_tiling(ss, tiling);
662 }
663
664 static void
665 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
666 {
667    switch (tiling) {
668    case I915_TILING_NONE:
669       ss->ss0.tiled_surface = 0;
670       ss->ss0.tile_walk = 0;
671       break;
672    case I915_TILING_X:
673       ss->ss0.tiled_surface = 1;
674       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
675       break;
676    case I915_TILING_Y:
677       ss->ss0.tiled_surface = 1;
678       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
679       break;
680    }
681 }
682
683 static void
684 gen7_render_set_surface_state(struct gen7_surface_state *ss,
685                               dri_bo *bo, unsigned long offset,
686                               int width, int height,
687                               int pitch, int format)
688 {
689     unsigned int tiling;
690     unsigned int swizzle;
691
692     memset(ss, 0, sizeof(*ss));
693
694     ss->ss0.surface_type = I965_SURFACE_2D;
695     ss->ss0.surface_format = format;
696
697     ss->ss1.base_addr = bo->offset + offset;
698
699     ss->ss2.width = width - 1;
700     ss->ss2.height = height - 1;
701
702     ss->ss3.pitch = pitch - 1;
703
704     dri_bo_get_tiling(bo, &tiling, &swizzle);
705     gen7_render_set_surface_tiling(ss, tiling);
706 }
707
708 static void
709 i965_render_src_surface_state(VADriverContextP ctx, 
710                               int index,
711                               dri_bo *region,
712                               unsigned long offset,
713                               int w, int h,
714                               int pitch, int format)
715 {
716     struct i965_driver_data *i965 = i965_driver_data(ctx);  
717     struct i965_render_state *render_state = &i965->render_state;
718     void *ss;
719     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
720
721     assert(index < MAX_RENDER_SURFACES);
722
723     dri_bo_map(ss_bo, 1);
724     assert(ss_bo->virtual);
725     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
726
727     if (IS_GEN7(i965->intel.device_id)) {
728         gen7_render_set_surface_state(ss,
729                                       region, offset,
730                                       w, h,
731                                       pitch, format);
732         dri_bo_emit_reloc(ss_bo,
733                           I915_GEM_DOMAIN_SAMPLER, 0,
734                           offset,
735                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
736                           region);
737     } else {
738         i965_render_set_surface_state(ss,
739                                       region, offset,
740                                       w, h,
741                                       pitch, format);
742         dri_bo_emit_reloc(ss_bo,
743                           I915_GEM_DOMAIN_SAMPLER, 0,
744                           offset,
745                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
746                           region);
747     }
748
749     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
750     dri_bo_unmap(ss_bo);
751     render_state->wm.sampler_count++;
752 }
753
754 static void
755 i965_render_src_surfaces_state(VADriverContextP ctx,
756                               VASurfaceID surface)
757 {
758     struct i965_driver_data *i965 = i965_driver_data(ctx);  
759     struct object_surface *obj_surface;
760     int region_pitch;
761     int rw, rh;
762     dri_bo *region;
763
764     obj_surface = SURFACE(surface);
765     assert(obj_surface);
766
767     region_pitch = obj_surface->width;
768     rw = obj_surface->orig_width;
769     rh = obj_surface->orig_height;
770     region = obj_surface->bo;
771
772     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
773     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM);
774
775     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
776         i965_render_src_surface_state(ctx, 3, region,
777                                       region_pitch * obj_surface->y_cb_offset,
778                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
779                                       I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
780         i965_render_src_surface_state(ctx, 4, region,
781                                       region_pitch * obj_surface->y_cb_offset,
782                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
783                                       I965_SURFACEFORMAT_R8G8_UNORM);
784     } else {
785         i965_render_src_surface_state(ctx, 3, region,
786                                       region_pitch * obj_surface->y_cb_offset,
787                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
788                                       I965_SURFACEFORMAT_R8_UNORM); /* U */
789         i965_render_src_surface_state(ctx, 4, region,
790                                       region_pitch * obj_surface->y_cb_offset,
791                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
792                                       I965_SURFACEFORMAT_R8_UNORM);
793         i965_render_src_surface_state(ctx, 5, region,
794                                       region_pitch * obj_surface->y_cr_offset,
795                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
796                                       I965_SURFACEFORMAT_R8_UNORM); /* V */
797         i965_render_src_surface_state(ctx, 6, region,
798                                       region_pitch * obj_surface->y_cr_offset,
799                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
800                                       I965_SURFACEFORMAT_R8_UNORM);
801     }
802 }
803
804 static void
805 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
806                               VASurfaceID surface)
807 {
808     struct i965_driver_data *i965 = i965_driver_data(ctx);  
809     struct object_surface *obj_surface = SURFACE(surface);
810     int w, h;
811     dri_bo *region;
812     dri_bo *subpic_region;
813     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
814     struct object_image *obj_image = IMAGE(obj_subpic->image);
815     assert(obj_surface);
816     assert(obj_surface->bo);
817     w = obj_surface->width;
818     h = obj_surface->height;
819     region = obj_surface->bo;
820     subpic_region = obj_image->bo;
821     /*subpicture surface*/
822     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
823     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
824 }
825
826 static void
827 i965_render_dest_surface_state(VADriverContextP ctx, int index)
828 {
829     struct i965_driver_data *i965 = i965_driver_data(ctx);  
830     struct i965_render_state *render_state = &i965->render_state;
831     struct intel_region *dest_region = render_state->draw_region;
832     void *ss;
833     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
834     int format;
835     assert(index < MAX_RENDER_SURFACES);
836
837     if (dest_region->cpp == 2) {
838         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
839     } else {
840         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
841     }
842
843     dri_bo_map(ss_bo, 1);
844     assert(ss_bo->virtual);
845     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
846
847     if (IS_GEN7(i965->intel.device_id)) {
848         gen7_render_set_surface_state(ss,
849                                       dest_region->bo, 0,
850                                       dest_region->width, dest_region->height,
851                                       dest_region->pitch, format);
852         dri_bo_emit_reloc(ss_bo,
853                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
854                           0,
855                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
856                           dest_region->bo);
857     } else {
858         i965_render_set_surface_state(ss,
859                                       dest_region->bo, 0,
860                                       dest_region->width, dest_region->height,
861                                       dest_region->pitch, format);
862         dri_bo_emit_reloc(ss_bo,
863                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
864                           0,
865                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
866                           dest_region->bo);
867     }
868
869     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
870     dri_bo_unmap(ss_bo);
871 }
872
873 static void 
874 i965_subpic_render_upload_vertex(VADriverContextP ctx,
875                                  VASurfaceID surface,
876                                  const VARectangle *output_rect)
877 {    
878     struct i965_driver_data  *i965         = i965_driver_data(ctx);
879     struct i965_render_state *render_state = &i965->render_state;
880     struct object_surface    *obj_surface  = SURFACE(surface);
881     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
882     VARectangle dst_rect;
883     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
884     int i = 0;
885
886     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
887         dst_rect = obj_subpic->dst_rect;
888     else {
889         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
890         const float sy  = (float)output_rect->height / obj_surface->orig_height;
891         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
892         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
893         dst_rect.width  = sx * obj_subpic->dst_rect.width;
894         dst_rect.height = sy * obj_subpic->dst_rect.height;
895     }
896
897     dri_bo_map(render_state->vb.vertex_buffer, 1);
898     assert(render_state->vb.vertex_buffer->virtual);
899     vb = render_state->vb.vertex_buffer->virtual;
900
901     tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width;
902     ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height;
903     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
904     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
905
906     x1 = (float)dst_rect.x;
907     y1 = (float)dst_rect.y;
908     x2 = (float)(dst_rect.x + dst_rect.width);
909     y2 = (float)(dst_rect.y + dst_rect.height);
910
911     vb[i++] = tx2;
912     vb[i++] = ty2;
913     vb[i++] = x2;
914     vb[i++] = y2;
915
916     vb[i++] = tx1;
917     vb[i++] = ty2;
918     vb[i++] = x1;
919     vb[i++] = y2;
920
921     vb[i++] = tx1;
922     vb[i++] = ty1;
923     vb[i++] = x1;
924     vb[i++] = y1;
925     dri_bo_unmap(render_state->vb.vertex_buffer);
926 }
927
928 static void 
929 i965_render_upload_vertex(
930     VADriverContextP   ctx,
931     VASurfaceID        surface,
932     const VARectangle *src_rect,
933     const VARectangle *dst_rect
934 )
935 {
936     struct i965_driver_data *i965 = i965_driver_data(ctx);
937     struct i965_render_state *render_state = &i965->render_state;
938     struct intel_region *dest_region = render_state->draw_region;
939     struct object_surface *obj_surface;
940     float *vb;
941
942     float u1, v1, u2, v2;
943     int i, width, height;
944     int box_x1 = dest_region->x + dst_rect->x;
945     int box_y1 = dest_region->y + dst_rect->y;
946     int box_x2 = box_x1 + dst_rect->width;
947     int box_y2 = box_y1 + dst_rect->height;
948
949     obj_surface = SURFACE(surface);
950     assert(surface);
951     width = obj_surface->orig_width;
952     height = obj_surface->orig_height;
953
954     u1 = (float)src_rect->x / width;
955     v1 = (float)src_rect->y / height;
956     u2 = (float)(src_rect->x + src_rect->width) / width;
957     v2 = (float)(src_rect->y + src_rect->height) / height;
958
959     dri_bo_map(render_state->vb.vertex_buffer, 1);
960     assert(render_state->vb.vertex_buffer->virtual);
961     vb = render_state->vb.vertex_buffer->virtual;
962
963     i = 0;
964     vb[i++] = u2;
965     vb[i++] = v2;
966     vb[i++] = (float)box_x2;
967     vb[i++] = (float)box_y2;
968     
969     vb[i++] = u1;
970     vb[i++] = v2;
971     vb[i++] = (float)box_x1;
972     vb[i++] = (float)box_y2;
973
974     vb[i++] = u1;
975     vb[i++] = v1;
976     vb[i++] = (float)box_x1;
977     vb[i++] = (float)box_y1;
978
979     dri_bo_unmap(render_state->vb.vertex_buffer);
980 }
981
982 static void
983 i965_render_upload_constants(VADriverContextP ctx,
984                              VASurfaceID surface)
985 {
986     struct i965_driver_data *i965 = i965_driver_data(ctx);
987     struct i965_render_state *render_state = &i965->render_state;
988     unsigned short *constant_buffer;
989     struct object_surface *obj_surface = SURFACE(surface);
990
991     dri_bo_map(render_state->curbe.bo, 1);
992     assert(render_state->curbe.bo->virtual);
993     constant_buffer = render_state->curbe.bo->virtual;
994
995     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
996         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
997                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
998         *constant_buffer = 2;
999     } else {
1000         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1001             *constant_buffer = 1;
1002         else
1003             *constant_buffer = 0;
1004     }
1005
1006     dri_bo_unmap(render_state->curbe.bo);
1007 }
1008
1009 static void
1010 i965_surface_render_state_setup(
1011     VADriverContextP   ctx,
1012     VASurfaceID        surface,
1013     const VARectangle *src_rect,
1014     const VARectangle *dst_rect
1015 )
1016 {
1017     i965_render_vs_unit(ctx);
1018     i965_render_sf_unit(ctx);
1019     i965_render_dest_surface_state(ctx, 0);
1020     i965_render_src_surfaces_state(ctx, surface);
1021     i965_render_sampler(ctx);
1022     i965_render_wm_unit(ctx);
1023     i965_render_cc_viewport(ctx);
1024     i965_render_cc_unit(ctx);
1025     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1026     i965_render_upload_constants(ctx, surface);
1027 }
1028 static void
1029 i965_subpic_render_state_setup(
1030     VADriverContextP   ctx,
1031     VASurfaceID        surface,
1032     const VARectangle *src_rect,
1033     const VARectangle *dst_rect
1034 )
1035 {
1036     i965_render_vs_unit(ctx);
1037     i965_render_sf_unit(ctx);
1038     i965_render_dest_surface_state(ctx, 0);
1039     i965_subpic_render_src_surfaces_state(ctx, surface);
1040     i965_render_sampler(ctx);
1041     i965_subpic_render_wm_unit(ctx);
1042     i965_render_cc_viewport(ctx);
1043     i965_subpic_render_cc_unit(ctx);
1044     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1045 }
1046
1047
1048 static void
1049 i965_render_pipeline_select(VADriverContextP ctx)
1050 {
1051     struct i965_driver_data *i965 = i965_driver_data(ctx);
1052     struct intel_batchbuffer *batch = i965->batch;
1053  
1054     BEGIN_BATCH(batch, 1);
1055     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1056     ADVANCE_BATCH(batch);
1057 }
1058
1059 static void
1060 i965_render_state_sip(VADriverContextP ctx)
1061 {
1062     struct i965_driver_data *i965 = i965_driver_data(ctx);
1063     struct intel_batchbuffer *batch = i965->batch;
1064
1065     BEGIN_BATCH(batch, 2);
1066     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1067     OUT_BATCH(batch, 0);
1068     ADVANCE_BATCH(batch);
1069 }
1070
1071 static void
1072 i965_render_state_base_address(VADriverContextP ctx)
1073 {
1074     struct i965_driver_data *i965 = i965_driver_data(ctx);
1075     struct intel_batchbuffer *batch = i965->batch;
1076     struct i965_render_state *render_state = &i965->render_state;
1077
1078     if (IS_IRONLAKE(i965->intel.device_id)) {
1079         BEGIN_BATCH(batch, 8);
1080         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1081         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1082         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1083         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1084         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1085         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1086         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1087         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1088         ADVANCE_BATCH(batch);
1089     } else {
1090         BEGIN_BATCH(batch, 6);
1091         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1092         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1093         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1094         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1095         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1096         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1097         ADVANCE_BATCH(batch);
1098     }
1099 }
1100
1101 static void
1102 i965_render_binding_table_pointers(VADriverContextP ctx)
1103 {
1104     struct i965_driver_data *i965 = i965_driver_data(ctx);
1105     struct intel_batchbuffer *batch = i965->batch;
1106
1107     BEGIN_BATCH(batch, 6);
1108     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1109     OUT_BATCH(batch, 0); /* vs */
1110     OUT_BATCH(batch, 0); /* gs */
1111     OUT_BATCH(batch, 0); /* clip */
1112     OUT_BATCH(batch, 0); /* sf */
1113     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1114     ADVANCE_BATCH(batch);
1115 }
1116
1117 static void 
1118 i965_render_constant_color(VADriverContextP ctx)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct intel_batchbuffer *batch = i965->batch;
1122
1123     BEGIN_BATCH(batch, 5);
1124     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1125     OUT_BATCH(batch, float_to_uint(1.0));
1126     OUT_BATCH(batch, float_to_uint(0.0));
1127     OUT_BATCH(batch, float_to_uint(1.0));
1128     OUT_BATCH(batch, float_to_uint(1.0));
1129     ADVANCE_BATCH(batch);
1130 }
1131
1132 static void
1133 i965_render_pipelined_pointers(VADriverContextP ctx)
1134 {
1135     struct i965_driver_data *i965 = i965_driver_data(ctx);
1136     struct intel_batchbuffer *batch = i965->batch;
1137     struct i965_render_state *render_state = &i965->render_state;
1138
1139     BEGIN_BATCH(batch, 7);
1140     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1141     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1142     OUT_BATCH(batch, 0);  /* disable GS */
1143     OUT_BATCH(batch, 0);  /* disable CLIP */
1144     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1145     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1146     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1147     ADVANCE_BATCH(batch);
1148 }
1149
1150 static void
1151 i965_render_urb_layout(VADriverContextP ctx)
1152 {
1153     struct i965_driver_data *i965 = i965_driver_data(ctx);
1154     struct intel_batchbuffer *batch = i965->batch;
1155     int urb_vs_start, urb_vs_size;
1156     int urb_gs_start, urb_gs_size;
1157     int urb_clip_start, urb_clip_size;
1158     int urb_sf_start, urb_sf_size;
1159     int urb_cs_start, urb_cs_size;
1160
1161     urb_vs_start = 0;
1162     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1163     urb_gs_start = urb_vs_start + urb_vs_size;
1164     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1165     urb_clip_start = urb_gs_start + urb_gs_size;
1166     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1167     urb_sf_start = urb_clip_start + urb_clip_size;
1168     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1169     urb_cs_start = urb_sf_start + urb_sf_size;
1170     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1171
1172     BEGIN_BATCH(batch, 3);
1173     OUT_BATCH(batch, 
1174               CMD_URB_FENCE |
1175               UF0_CS_REALLOC |
1176               UF0_SF_REALLOC |
1177               UF0_CLIP_REALLOC |
1178               UF0_GS_REALLOC |
1179               UF0_VS_REALLOC |
1180               1);
1181     OUT_BATCH(batch, 
1182               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1183               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1184               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1185     OUT_BATCH(batch,
1186               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1187               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1188     ADVANCE_BATCH(batch);
1189 }
1190
1191 static void 
1192 i965_render_cs_urb_layout(VADriverContextP ctx)
1193 {
1194     struct i965_driver_data *i965 = i965_driver_data(ctx);
1195     struct intel_batchbuffer *batch = i965->batch;
1196
1197     BEGIN_BATCH(batch, 2);
1198     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1199     OUT_BATCH(batch,
1200               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1201               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1202     ADVANCE_BATCH(batch);
1203 }
1204
1205 static void
1206 i965_render_constant_buffer(VADriverContextP ctx)
1207 {
1208     struct i965_driver_data *i965 = i965_driver_data(ctx);
1209     struct intel_batchbuffer *batch = i965->batch;
1210     struct i965_render_state *render_state = &i965->render_state;
1211
1212     BEGIN_BATCH(batch, 2);
1213     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1214     OUT_RELOC(batch, render_state->curbe.bo,
1215               I915_GEM_DOMAIN_INSTRUCTION, 0,
1216               URB_CS_ENTRY_SIZE - 1);
1217     ADVANCE_BATCH(batch);    
1218 }
1219
1220 static void
1221 i965_render_drawing_rectangle(VADriverContextP ctx)
1222 {
1223     struct i965_driver_data *i965 = i965_driver_data(ctx);
1224     struct intel_batchbuffer *batch = i965->batch;
1225     struct i965_render_state *render_state = &i965->render_state;
1226     struct intel_region *dest_region = render_state->draw_region;
1227
1228     BEGIN_BATCH(batch, 4);
1229     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1230     OUT_BATCH(batch, 0x00000000);
1231     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1232     OUT_BATCH(batch, 0x00000000);         
1233     ADVANCE_BATCH(batch);
1234 }
1235
1236 static void
1237 i965_render_vertex_elements(VADriverContextP ctx)
1238 {
1239     struct i965_driver_data *i965 = i965_driver_data(ctx);
1240     struct intel_batchbuffer *batch = i965->batch;
1241
1242     if (IS_IRONLAKE(i965->intel.device_id)) {
1243         BEGIN_BATCH(batch, 5);
1244         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1245         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1246         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1247                   VE0_VALID |
1248                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1249                   (0 << VE0_OFFSET_SHIFT));
1250         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1251                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1252                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1253                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1254         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1255         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1256                   VE0_VALID |
1257                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1258                   (8 << VE0_OFFSET_SHIFT));
1259         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1260                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1261                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1262                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1263         ADVANCE_BATCH(batch);
1264     } else {
1265         BEGIN_BATCH(batch, 5);
1266         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1267         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1268         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1269                   VE0_VALID |
1270                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1271                   (0 << VE0_OFFSET_SHIFT));
1272         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1273                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1274                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1275                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1276                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1277         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1278         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1279                   VE0_VALID |
1280                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1281                   (8 << VE0_OFFSET_SHIFT));
1282         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1283                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1284                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1285                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1286                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1287         ADVANCE_BATCH(batch);
1288     }
1289 }
1290
1291 static void
1292 i965_render_upload_image_palette(
1293     VADriverContextP ctx,
1294     VAImageID        image_id,
1295     unsigned int     alpha
1296 )
1297 {
1298     struct i965_driver_data *i965 = i965_driver_data(ctx);
1299     struct intel_batchbuffer *batch = i965->batch;
1300     unsigned int i;
1301
1302     struct object_image *obj_image = IMAGE(image_id);
1303     assert(obj_image);
1304
1305     if (obj_image->image.num_palette_entries == 0)
1306         return;
1307
1308     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1309     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1310     /*fill palette*/
1311     //int32_t out[16]; //0-23:color 23-31:alpha
1312     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1313         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1314     ADVANCE_BATCH(batch);
1315 }
1316
1317 static void
1318 i965_render_startup(VADriverContextP ctx)
1319 {
1320     struct i965_driver_data *i965 = i965_driver_data(ctx);
1321     struct intel_batchbuffer *batch = i965->batch;
1322     struct i965_render_state *render_state = &i965->render_state;
1323
1324     BEGIN_BATCH(batch, 11);
1325     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1326     OUT_BATCH(batch, 
1327               (0 << VB0_BUFFER_INDEX_SHIFT) |
1328               VB0_VERTEXDATA |
1329               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1330     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1331
1332     if (IS_IRONLAKE(i965->intel.device_id))
1333         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1334     else
1335         OUT_BATCH(batch, 3);
1336
1337     OUT_BATCH(batch, 0);
1338
1339     OUT_BATCH(batch, 
1340               CMD_3DPRIMITIVE |
1341               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1342               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1343               (0 << 9) |
1344               4);
1345     OUT_BATCH(batch, 3); /* vertex count per instance */
1346     OUT_BATCH(batch, 0); /* start vertex offset */
1347     OUT_BATCH(batch, 1); /* single instance */
1348     OUT_BATCH(batch, 0); /* start instance location */
1349     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1350     ADVANCE_BATCH(batch);
1351 }
1352
1353 static void 
1354 i965_clear_dest_region(VADriverContextP ctx)
1355 {
1356     struct i965_driver_data *i965 = i965_driver_data(ctx);
1357     struct intel_batchbuffer *batch = i965->batch;
1358     struct i965_render_state *render_state = &i965->render_state;
1359     struct intel_region *dest_region = render_state->draw_region;
1360     unsigned int blt_cmd, br13;
1361     int pitch;
1362
1363     blt_cmd = XY_COLOR_BLT_CMD;
1364     br13 = 0xf0 << 16;
1365     pitch = dest_region->pitch;
1366
1367     if (dest_region->cpp == 4) {
1368         br13 |= BR13_8888;
1369         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1370     } else {
1371         assert(dest_region->cpp == 2);
1372         br13 |= BR13_565;
1373     }
1374
1375     if (dest_region->tiling != I915_TILING_NONE) {
1376         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1377         pitch /= 4;
1378     }
1379
1380     br13 |= pitch;
1381
1382     if (IS_GEN6(i965->intel.device_id) ||
1383         IS_GEN7(i965->intel.device_id)) {
1384         intel_batchbuffer_start_atomic_blt(batch, 24);
1385         BEGIN_BLT_BATCH(batch, 6);
1386     } else {
1387         intel_batchbuffer_start_atomic(batch, 24);
1388         BEGIN_BATCH(batch, 6);
1389     }
1390
1391     OUT_BATCH(batch, blt_cmd);
1392     OUT_BATCH(batch, br13);
1393     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1394     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1395               (dest_region->x + dest_region->width));
1396     OUT_RELOC(batch, dest_region->bo, 
1397               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1398               0);
1399     OUT_BATCH(batch, 0x0);
1400     ADVANCE_BATCH(batch);
1401     intel_batchbuffer_end_atomic(batch);
1402 }
1403
1404 static void
1405 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1406 {
1407     struct i965_driver_data *i965 = i965_driver_data(ctx);
1408     struct intel_batchbuffer *batch = i965->batch;
1409
1410     i965_clear_dest_region(ctx);
1411     intel_batchbuffer_start_atomic(batch, 0x1000);
1412     intel_batchbuffer_emit_mi_flush(batch);
1413     i965_render_pipeline_select(ctx);
1414     i965_render_state_sip(ctx);
1415     i965_render_state_base_address(ctx);
1416     i965_render_binding_table_pointers(ctx);
1417     i965_render_constant_color(ctx);
1418     i965_render_pipelined_pointers(ctx);
1419     i965_render_urb_layout(ctx);
1420     i965_render_cs_urb_layout(ctx);
1421     i965_render_constant_buffer(ctx);
1422     i965_render_drawing_rectangle(ctx);
1423     i965_render_vertex_elements(ctx);
1424     i965_render_startup(ctx);
1425     intel_batchbuffer_end_atomic(batch);
1426 }
1427
1428 static void
1429 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1430 {
1431     struct i965_driver_data *i965 = i965_driver_data(ctx);
1432     struct intel_batchbuffer *batch = i965->batch;
1433
1434     intel_batchbuffer_start_atomic(batch, 0x1000);
1435     intel_batchbuffer_emit_mi_flush(batch);
1436     i965_render_pipeline_select(ctx);
1437     i965_render_state_sip(ctx);
1438     i965_render_state_base_address(ctx);
1439     i965_render_binding_table_pointers(ctx);
1440     i965_render_constant_color(ctx);
1441     i965_render_pipelined_pointers(ctx);
1442     i965_render_urb_layout(ctx);
1443     i965_render_cs_urb_layout(ctx);
1444     i965_render_drawing_rectangle(ctx);
1445     i965_render_vertex_elements(ctx);
1446     i965_render_startup(ctx);
1447     intel_batchbuffer_end_atomic(batch);
1448 }
1449
1450
1451 static void 
1452 i965_render_initialize(VADriverContextP ctx)
1453 {
1454     struct i965_driver_data *i965 = i965_driver_data(ctx);
1455     struct i965_render_state *render_state = &i965->render_state;
1456     dri_bo *bo;
1457
1458     /* VERTEX BUFFER */
1459     dri_bo_unreference(render_state->vb.vertex_buffer);
1460     bo = dri_bo_alloc(i965->intel.bufmgr,
1461                       "vertex buffer",
1462                       4096,
1463                       4096);
1464     assert(bo);
1465     render_state->vb.vertex_buffer = bo;
1466
1467     /* VS */
1468     dri_bo_unreference(render_state->vs.state);
1469     bo = dri_bo_alloc(i965->intel.bufmgr,
1470                       "vs state",
1471                       sizeof(struct i965_vs_unit_state),
1472                       64);
1473     assert(bo);
1474     render_state->vs.state = bo;
1475
1476     /* GS */
1477     /* CLIP */
1478     /* SF */
1479     dri_bo_unreference(render_state->sf.state);
1480     bo = dri_bo_alloc(i965->intel.bufmgr,
1481                       "sf state",
1482                       sizeof(struct i965_sf_unit_state),
1483                       64);
1484     assert(bo);
1485     render_state->sf.state = bo;
1486
1487     /* WM */
1488     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1489     bo = dri_bo_alloc(i965->intel.bufmgr,
1490                       "surface state & binding table",
1491                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1492                       4096);
1493     assert(bo);
1494     render_state->wm.surface_state_binding_table_bo = bo;
1495
1496     dri_bo_unreference(render_state->wm.sampler);
1497     bo = dri_bo_alloc(i965->intel.bufmgr,
1498                       "sampler state",
1499                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1500                       64);
1501     assert(bo);
1502     render_state->wm.sampler = bo;
1503     render_state->wm.sampler_count = 0;
1504
1505     dri_bo_unreference(render_state->wm.state);
1506     bo = dri_bo_alloc(i965->intel.bufmgr,
1507                       "wm state",
1508                       sizeof(struct i965_wm_unit_state),
1509                       64);
1510     assert(bo);
1511     render_state->wm.state = bo;
1512
1513     /* COLOR CALCULATOR */
1514     dri_bo_unreference(render_state->cc.state);
1515     bo = dri_bo_alloc(i965->intel.bufmgr,
1516                       "color calc state",
1517                       sizeof(struct i965_cc_unit_state),
1518                       64);
1519     assert(bo);
1520     render_state->cc.state = bo;
1521
1522     dri_bo_unreference(render_state->cc.viewport);
1523     bo = dri_bo_alloc(i965->intel.bufmgr,
1524                       "cc viewport",
1525                       sizeof(struct i965_cc_viewport),
1526                       64);
1527     assert(bo);
1528     render_state->cc.viewport = bo;
1529 }
1530
1531 static void
1532 i965_render_put_surface(
1533     VADriverContextP   ctx,
1534     VASurfaceID        surface,
1535     const VARectangle *src_rect,
1536     const VARectangle *dst_rect,
1537     unsigned int       flags
1538 )
1539 {
1540     struct i965_driver_data *i965 = i965_driver_data(ctx);
1541     struct intel_batchbuffer *batch = i965->batch;
1542
1543     i965_render_initialize(ctx);
1544     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect);
1545     i965_surface_render_pipeline_setup(ctx);
1546     intel_batchbuffer_flush(batch);
1547 }
1548
1549 static void
1550 i965_render_put_subpicture(
1551     VADriverContextP   ctx,
1552     VASurfaceID        surface,
1553     const VARectangle *src_rect,
1554     const VARectangle *dst_rect
1555 )
1556 {
1557     struct i965_driver_data *i965 = i965_driver_data(ctx);
1558     struct intel_batchbuffer *batch = i965->batch;
1559     struct object_surface *obj_surface = SURFACE(surface);
1560     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1561
1562     assert(obj_subpic);
1563
1564     i965_render_initialize(ctx);
1565     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1566     i965_subpic_render_pipeline_setup(ctx);
1567     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1568     intel_batchbuffer_flush(batch);
1569 }
1570
1571 /*
1572  * for GEN6+
1573  */
1574 static void 
1575 gen6_render_initialize(VADriverContextP ctx)
1576 {
1577     struct i965_driver_data *i965 = i965_driver_data(ctx);
1578     struct i965_render_state *render_state = &i965->render_state;
1579     dri_bo *bo;
1580
1581     /* VERTEX BUFFER */
1582     dri_bo_unreference(render_state->vb.vertex_buffer);
1583     bo = dri_bo_alloc(i965->intel.bufmgr,
1584                       "vertex buffer",
1585                       4096,
1586                       4096);
1587     assert(bo);
1588     render_state->vb.vertex_buffer = bo;
1589
1590     /* WM */
1591     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1592     bo = dri_bo_alloc(i965->intel.bufmgr,
1593                       "surface state & binding table",
1594                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1595                       4096);
1596     assert(bo);
1597     render_state->wm.surface_state_binding_table_bo = bo;
1598
1599     dri_bo_unreference(render_state->wm.sampler);
1600     bo = dri_bo_alloc(i965->intel.bufmgr,
1601                       "sampler state",
1602                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1603                       4096);
1604     assert(bo);
1605     render_state->wm.sampler = bo;
1606     render_state->wm.sampler_count = 0;
1607
1608     /* COLOR CALCULATOR */
1609     dri_bo_unreference(render_state->cc.state);
1610     bo = dri_bo_alloc(i965->intel.bufmgr,
1611                       "color calc state",
1612                       sizeof(struct gen6_color_calc_state),
1613                       4096);
1614     assert(bo);
1615     render_state->cc.state = bo;
1616
1617     /* CC VIEWPORT */
1618     dri_bo_unreference(render_state->cc.viewport);
1619     bo = dri_bo_alloc(i965->intel.bufmgr,
1620                       "cc viewport",
1621                       sizeof(struct i965_cc_viewport),
1622                       4096);
1623     assert(bo);
1624     render_state->cc.viewport = bo;
1625
1626     /* BLEND STATE */
1627     dri_bo_unreference(render_state->cc.blend);
1628     bo = dri_bo_alloc(i965->intel.bufmgr,
1629                       "blend state",
1630                       sizeof(struct gen6_blend_state),
1631                       4096);
1632     assert(bo);
1633     render_state->cc.blend = bo;
1634
1635     /* DEPTH & STENCIL STATE */
1636     dri_bo_unreference(render_state->cc.depth_stencil);
1637     bo = dri_bo_alloc(i965->intel.bufmgr,
1638                       "depth & stencil state",
1639                       sizeof(struct gen6_depth_stencil_state),
1640                       4096);
1641     assert(bo);
1642     render_state->cc.depth_stencil = bo;
1643 }
1644
1645 static void
1646 gen6_render_color_calc_state(VADriverContextP ctx)
1647 {
1648     struct i965_driver_data *i965 = i965_driver_data(ctx);
1649     struct i965_render_state *render_state = &i965->render_state;
1650     struct gen6_color_calc_state *color_calc_state;
1651     
1652     dri_bo_map(render_state->cc.state, 1);
1653     assert(render_state->cc.state->virtual);
1654     color_calc_state = render_state->cc.state->virtual;
1655     memset(color_calc_state, 0, sizeof(*color_calc_state));
1656     color_calc_state->constant_r = 1.0;
1657     color_calc_state->constant_g = 0.0;
1658     color_calc_state->constant_b = 1.0;
1659     color_calc_state->constant_a = 1.0;
1660     dri_bo_unmap(render_state->cc.state);
1661 }
1662
1663 static void
1664 gen6_render_blend_state(VADriverContextP ctx)
1665 {
1666     struct i965_driver_data *i965 = i965_driver_data(ctx);
1667     struct i965_render_state *render_state = &i965->render_state;
1668     struct gen6_blend_state *blend_state;
1669     
1670     dri_bo_map(render_state->cc.blend, 1);
1671     assert(render_state->cc.blend->virtual);
1672     blend_state = render_state->cc.blend->virtual;
1673     memset(blend_state, 0, sizeof(*blend_state));
1674     blend_state->blend1.logic_op_enable = 1;
1675     blend_state->blend1.logic_op_func = 0xc;
1676     dri_bo_unmap(render_state->cc.blend);
1677 }
1678
1679 static void
1680 gen6_render_depth_stencil_state(VADriverContextP ctx)
1681 {
1682     struct i965_driver_data *i965 = i965_driver_data(ctx);
1683     struct i965_render_state *render_state = &i965->render_state;
1684     struct gen6_depth_stencil_state *depth_stencil_state;
1685     
1686     dri_bo_map(render_state->cc.depth_stencil, 1);
1687     assert(render_state->cc.depth_stencil->virtual);
1688     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1689     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1690     dri_bo_unmap(render_state->cc.depth_stencil);
1691 }
1692
1693 static void
1694 gen6_render_setup_states(
1695     VADriverContextP   ctx,
1696     VASurfaceID        surface,
1697     const VARectangle *src_rect,
1698     const VARectangle *dst_rect
1699 )
1700 {
1701     i965_render_dest_surface_state(ctx, 0);
1702     i965_render_src_surfaces_state(ctx, surface);
1703     i965_render_sampler(ctx);
1704     i965_render_cc_viewport(ctx);
1705     gen6_render_color_calc_state(ctx);
1706     gen6_render_blend_state(ctx);
1707     gen6_render_depth_stencil_state(ctx);
1708     i965_render_upload_constants(ctx, surface);
1709     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1710 }
1711
1712 static void
1713 gen6_emit_invarient_states(VADriverContextP ctx)
1714 {
1715     struct i965_driver_data *i965 = i965_driver_data(ctx);
1716     struct intel_batchbuffer *batch = i965->batch;
1717
1718     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1719
1720     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1721     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1722               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1723     OUT_BATCH(batch, 0);
1724
1725     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1726     OUT_BATCH(batch, 1);
1727
1728     /* Set system instruction pointer */
1729     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1730     OUT_BATCH(batch, 0);
1731 }
1732
1733 static void
1734 gen6_emit_state_base_address(VADriverContextP ctx)
1735 {
1736     struct i965_driver_data *i965 = i965_driver_data(ctx);
1737     struct intel_batchbuffer *batch = i965->batch;
1738     struct i965_render_state *render_state = &i965->render_state;
1739
1740     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1741     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1742     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1743     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1744     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1745     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1746     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1747     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1748     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1749     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1750 }
1751
1752 static void
1753 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1754 {
1755     struct i965_driver_data *i965 = i965_driver_data(ctx);
1756     struct intel_batchbuffer *batch = i965->batch;
1757     struct i965_render_state *render_state = &i965->render_state;
1758
1759     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1760               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1761               (4 - 2));
1762     OUT_BATCH(batch, 0);
1763     OUT_BATCH(batch, 0);
1764     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1765 }
1766
1767 static void
1768 gen6_emit_urb(VADriverContextP ctx)
1769 {
1770     struct i965_driver_data *i965 = i965_driver_data(ctx);
1771     struct intel_batchbuffer *batch = i965->batch;
1772
1773     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1774     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1775               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1776     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1777               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1778 }
1779
1780 static void
1781 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1782 {
1783     struct i965_driver_data *i965 = i965_driver_data(ctx);
1784     struct intel_batchbuffer *batch = i965->batch;
1785     struct i965_render_state *render_state = &i965->render_state;
1786
1787     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1788     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1789     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1790     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1791 }
1792
1793 static void
1794 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1795 {
1796     struct i965_driver_data *i965 = i965_driver_data(ctx);
1797     struct intel_batchbuffer *batch = i965->batch;
1798     struct i965_render_state *render_state = &i965->render_state;
1799
1800     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1801               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1802               (4 - 2));
1803     OUT_BATCH(batch, 0); /* VS */
1804     OUT_BATCH(batch, 0); /* GS */
1805     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1806 }
1807
1808 static void
1809 gen6_emit_binding_table(VADriverContextP ctx)
1810 {
1811     struct i965_driver_data *i965 = i965_driver_data(ctx);
1812     struct intel_batchbuffer *batch = i965->batch;
1813
1814     /* Binding table pointers */
1815     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1816               GEN6_BINDING_TABLE_MODIFY_PS |
1817               (4 - 2));
1818     OUT_BATCH(batch, 0);                /* vs */
1819     OUT_BATCH(batch, 0);                /* gs */
1820     /* Only the PS uses the binding table */
1821     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1822 }
1823
1824 static void
1825 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1826 {
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct intel_batchbuffer *batch = i965->batch;
1829
1830     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1831     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1832               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1833     OUT_BATCH(batch, 0);
1834     OUT_BATCH(batch, 0);
1835     OUT_BATCH(batch, 0);
1836     OUT_BATCH(batch, 0);
1837     OUT_BATCH(batch, 0);
1838
1839     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1840     OUT_BATCH(batch, 0);
1841 }
1842
1843 static void
1844 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1845 {
1846     i965_render_drawing_rectangle(ctx);
1847 }
1848
1849 static void 
1850 gen6_emit_vs_state(VADriverContextP ctx)
1851 {
1852     struct i965_driver_data *i965 = i965_driver_data(ctx);
1853     struct intel_batchbuffer *batch = i965->batch;
1854
1855     /* disable VS constant buffer */
1856     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1857     OUT_BATCH(batch, 0);
1858     OUT_BATCH(batch, 0);
1859     OUT_BATCH(batch, 0);
1860     OUT_BATCH(batch, 0);
1861         
1862     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1863     OUT_BATCH(batch, 0); /* without VS kernel */
1864     OUT_BATCH(batch, 0);
1865     OUT_BATCH(batch, 0);
1866     OUT_BATCH(batch, 0);
1867     OUT_BATCH(batch, 0); /* pass-through */
1868 }
1869
1870 static void 
1871 gen6_emit_gs_state(VADriverContextP ctx)
1872 {
1873     struct i965_driver_data *i965 = i965_driver_data(ctx);
1874     struct intel_batchbuffer *batch = i965->batch;
1875
1876     /* disable GS constant buffer */
1877     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1878     OUT_BATCH(batch, 0);
1879     OUT_BATCH(batch, 0);
1880     OUT_BATCH(batch, 0);
1881     OUT_BATCH(batch, 0);
1882         
1883     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1884     OUT_BATCH(batch, 0); /* without GS kernel */
1885     OUT_BATCH(batch, 0);
1886     OUT_BATCH(batch, 0);
1887     OUT_BATCH(batch, 0);
1888     OUT_BATCH(batch, 0);
1889     OUT_BATCH(batch, 0); /* pass-through */
1890 }
1891
1892 static void 
1893 gen6_emit_clip_state(VADriverContextP ctx)
1894 {
1895     struct i965_driver_data *i965 = i965_driver_data(ctx);
1896     struct intel_batchbuffer *batch = i965->batch;
1897
1898     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1899     OUT_BATCH(batch, 0);
1900     OUT_BATCH(batch, 0); /* pass-through */
1901     OUT_BATCH(batch, 0);
1902 }
1903
1904 static void 
1905 gen6_emit_sf_state(VADriverContextP ctx)
1906 {
1907     struct i965_driver_data *i965 = i965_driver_data(ctx);
1908     struct intel_batchbuffer *batch = i965->batch;
1909
1910     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1911     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1912               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1913               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1914     OUT_BATCH(batch, 0);
1915     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1916     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1917     OUT_BATCH(batch, 0);
1918     OUT_BATCH(batch, 0);
1919     OUT_BATCH(batch, 0);
1920     OUT_BATCH(batch, 0);
1921     OUT_BATCH(batch, 0); /* DW9 */
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924     OUT_BATCH(batch, 0);
1925     OUT_BATCH(batch, 0);
1926     OUT_BATCH(batch, 0); /* DW14 */
1927     OUT_BATCH(batch, 0);
1928     OUT_BATCH(batch, 0);
1929     OUT_BATCH(batch, 0);
1930     OUT_BATCH(batch, 0);
1931     OUT_BATCH(batch, 0); /* DW19 */
1932 }
1933
1934 static void 
1935 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1936 {
1937     struct i965_driver_data *i965 = i965_driver_data(ctx);
1938     struct intel_batchbuffer *batch = i965->batch;
1939     struct i965_render_state *render_state = &i965->render_state;
1940
1941     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1942               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1943               (5 - 2));
1944     OUT_RELOC(batch, 
1945               render_state->curbe.bo,
1946               I915_GEM_DOMAIN_INSTRUCTION, 0,
1947               0);
1948     OUT_BATCH(batch, 0);
1949     OUT_BATCH(batch, 0);
1950     OUT_BATCH(batch, 0);
1951
1952     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1953     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1954               I915_GEM_DOMAIN_INSTRUCTION, 0,
1955               0);
1956     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1957               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1958     OUT_BATCH(batch, 0);
1959     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1960     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1961               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1962               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1963     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1964               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1965     OUT_BATCH(batch, 0);
1966     OUT_BATCH(batch, 0);
1967 }
1968
1969 static void
1970 gen6_emit_vertex_element_state(VADriverContextP ctx)
1971 {
1972     struct i965_driver_data *i965 = i965_driver_data(ctx);
1973     struct intel_batchbuffer *batch = i965->batch;
1974
1975     /* Set up our vertex elements, sourced from the single vertex buffer. */
1976     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
1977     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1978     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1979               GEN6_VE0_VALID |
1980               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1981               (0 << VE0_OFFSET_SHIFT));
1982     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1983               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1984               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1985               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1986     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1987     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1988               GEN6_VE0_VALID |
1989               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1990               (8 << VE0_OFFSET_SHIFT));
1991     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1992               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1993               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1994               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1995 }
1996
1997 static void
1998 gen6_emit_vertices(VADriverContextP ctx)
1999 {
2000     struct i965_driver_data *i965 = i965_driver_data(ctx);
2001     struct intel_batchbuffer *batch = i965->batch;
2002     struct i965_render_state *render_state = &i965->render_state;
2003
2004     BEGIN_BATCH(batch, 11);
2005     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2006     OUT_BATCH(batch, 
2007               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2008               GEN6_VB0_VERTEXDATA |
2009               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2010     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2011     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2012     OUT_BATCH(batch, 0);
2013
2014     OUT_BATCH(batch, 
2015               CMD_3DPRIMITIVE |
2016               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2017               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2018               (0 << 9) |
2019               4);
2020     OUT_BATCH(batch, 3); /* vertex count per instance */
2021     OUT_BATCH(batch, 0); /* start vertex offset */
2022     OUT_BATCH(batch, 1); /* single instance */
2023     OUT_BATCH(batch, 0); /* start instance location */
2024     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2025     ADVANCE_BATCH(batch);
2026 }
2027
2028 static void
2029 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2030 {
2031     struct i965_driver_data *i965 = i965_driver_data(ctx);
2032     struct intel_batchbuffer *batch = i965->batch;
2033
2034     intel_batchbuffer_start_atomic(batch, 0x1000);
2035     intel_batchbuffer_emit_mi_flush(batch);
2036     gen6_emit_invarient_states(ctx);
2037     gen6_emit_state_base_address(ctx);
2038     gen6_emit_viewport_state_pointers(ctx);
2039     gen6_emit_urb(ctx);
2040     gen6_emit_cc_state_pointers(ctx);
2041     gen6_emit_sampler_state_pointers(ctx);
2042     gen6_emit_vs_state(ctx);
2043     gen6_emit_gs_state(ctx);
2044     gen6_emit_clip_state(ctx);
2045     gen6_emit_sf_state(ctx);
2046     gen6_emit_wm_state(ctx, kernel);
2047     gen6_emit_binding_table(ctx);
2048     gen6_emit_depth_buffer_state(ctx);
2049     gen6_emit_drawing_rectangle(ctx);
2050     gen6_emit_vertex_element_state(ctx);
2051     gen6_emit_vertices(ctx);
2052     intel_batchbuffer_end_atomic(batch);
2053 }
2054
2055 static void
2056 gen6_render_put_surface(
2057     VADriverContextP   ctx,
2058     VASurfaceID        surface,
2059     const VARectangle *src_rect,
2060     const VARectangle *dst_rect,
2061     unsigned int       flags
2062 )
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065     struct intel_batchbuffer *batch = i965->batch;
2066
2067     gen6_render_initialize(ctx);
2068     gen6_render_setup_states(ctx, surface, src_rect, dst_rect);
2069     i965_clear_dest_region(ctx);
2070     gen6_render_emit_states(ctx, PS_KERNEL);
2071     intel_batchbuffer_flush(batch);
2072 }
2073
2074 static void
2075 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2076 {
2077     struct i965_driver_data *i965 = i965_driver_data(ctx);
2078     struct i965_render_state *render_state = &i965->render_state;
2079     struct gen6_blend_state *blend_state;
2080
2081     dri_bo_unmap(render_state->cc.state);    
2082     dri_bo_map(render_state->cc.blend, 1);
2083     assert(render_state->cc.blend->virtual);
2084     blend_state = render_state->cc.blend->virtual;
2085     memset(blend_state, 0, sizeof(*blend_state));
2086     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2087     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2088     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2089     blend_state->blend0.blend_enable = 1;
2090     blend_state->blend1.post_blend_clamp_enable = 1;
2091     blend_state->blend1.pre_blend_clamp_enable = 1;
2092     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2093     dri_bo_unmap(render_state->cc.blend);
2094 }
2095
2096 static void
2097 gen6_subpicture_render_setup_states(
2098     VADriverContextP   ctx,
2099     VASurfaceID        surface,
2100     const VARectangle *src_rect,
2101     const VARectangle *dst_rect
2102 )
2103 {
2104     i965_render_dest_surface_state(ctx, 0);
2105     i965_subpic_render_src_surfaces_state(ctx, surface);
2106     i965_render_sampler(ctx);
2107     i965_render_cc_viewport(ctx);
2108     gen6_render_color_calc_state(ctx);
2109     gen6_subpicture_render_blend_state(ctx);
2110     gen6_render_depth_stencil_state(ctx);
2111     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2112 }
2113
2114 static void
2115 gen6_render_put_subpicture(
2116     VADriverContextP   ctx,
2117     VASurfaceID        surface,
2118     const VARectangle *src_rect,
2119     const VARectangle *dst_rect
2120 )
2121 {
2122     struct i965_driver_data *i965 = i965_driver_data(ctx);
2123     struct intel_batchbuffer *batch = i965->batch;
2124     struct object_surface *obj_surface = SURFACE(surface);
2125     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2126
2127     assert(obj_subpic);
2128     gen6_render_initialize(ctx);
2129     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2130     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2131     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2132     intel_batchbuffer_flush(batch);
2133 }
2134
2135 /*
2136  * for GEN7
2137  */
2138 static void 
2139 gen7_render_initialize(VADriverContextP ctx)
2140 {
2141     struct i965_driver_data *i965 = i965_driver_data(ctx);
2142     struct i965_render_state *render_state = &i965->render_state;
2143     dri_bo *bo;
2144
2145     /* VERTEX BUFFER */
2146     dri_bo_unreference(render_state->vb.vertex_buffer);
2147     bo = dri_bo_alloc(i965->intel.bufmgr,
2148                       "vertex buffer",
2149                       4096,
2150                       4096);
2151     assert(bo);
2152     render_state->vb.vertex_buffer = bo;
2153
2154     /* WM */
2155     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2156     bo = dri_bo_alloc(i965->intel.bufmgr,
2157                       "surface state & binding table",
2158                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2159                       4096);
2160     assert(bo);
2161     render_state->wm.surface_state_binding_table_bo = bo;
2162
2163     dri_bo_unreference(render_state->wm.sampler);
2164     bo = dri_bo_alloc(i965->intel.bufmgr,
2165                       "sampler state",
2166                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2167                       4096);
2168     assert(bo);
2169     render_state->wm.sampler = bo;
2170     render_state->wm.sampler_count = 0;
2171
2172     /* COLOR CALCULATOR */
2173     dri_bo_unreference(render_state->cc.state);
2174     bo = dri_bo_alloc(i965->intel.bufmgr,
2175                       "color calc state",
2176                       sizeof(struct gen6_color_calc_state),
2177                       4096);
2178     assert(bo);
2179     render_state->cc.state = bo;
2180
2181     /* CC VIEWPORT */
2182     dri_bo_unreference(render_state->cc.viewport);
2183     bo = dri_bo_alloc(i965->intel.bufmgr,
2184                       "cc viewport",
2185                       sizeof(struct i965_cc_viewport),
2186                       4096);
2187     assert(bo);
2188     render_state->cc.viewport = bo;
2189
2190     /* BLEND STATE */
2191     dri_bo_unreference(render_state->cc.blend);
2192     bo = dri_bo_alloc(i965->intel.bufmgr,
2193                       "blend state",
2194                       sizeof(struct gen6_blend_state),
2195                       4096);
2196     assert(bo);
2197     render_state->cc.blend = bo;
2198
2199     /* DEPTH & STENCIL STATE */
2200     dri_bo_unreference(render_state->cc.depth_stencil);
2201     bo = dri_bo_alloc(i965->intel.bufmgr,
2202                       "depth & stencil state",
2203                       sizeof(struct gen6_depth_stencil_state),
2204                       4096);
2205     assert(bo);
2206     render_state->cc.depth_stencil = bo;
2207 }
2208
2209 static void
2210 gen7_render_color_calc_state(VADriverContextP ctx)
2211 {
2212     struct i965_driver_data *i965 = i965_driver_data(ctx);
2213     struct i965_render_state *render_state = &i965->render_state;
2214     struct gen6_color_calc_state *color_calc_state;
2215     
2216     dri_bo_map(render_state->cc.state, 1);
2217     assert(render_state->cc.state->virtual);
2218     color_calc_state = render_state->cc.state->virtual;
2219     memset(color_calc_state, 0, sizeof(*color_calc_state));
2220     color_calc_state->constant_r = 1.0;
2221     color_calc_state->constant_g = 0.0;
2222     color_calc_state->constant_b = 1.0;
2223     color_calc_state->constant_a = 1.0;
2224     dri_bo_unmap(render_state->cc.state);
2225 }
2226
2227 static void
2228 gen7_render_blend_state(VADriverContextP ctx)
2229 {
2230     struct i965_driver_data *i965 = i965_driver_data(ctx);
2231     struct i965_render_state *render_state = &i965->render_state;
2232     struct gen6_blend_state *blend_state;
2233     
2234     dri_bo_map(render_state->cc.blend, 1);
2235     assert(render_state->cc.blend->virtual);
2236     blend_state = render_state->cc.blend->virtual;
2237     memset(blend_state, 0, sizeof(*blend_state));
2238     blend_state->blend1.logic_op_enable = 1;
2239     blend_state->blend1.logic_op_func = 0xc;
2240     blend_state->blend1.pre_blend_clamp_enable = 1;
2241     dri_bo_unmap(render_state->cc.blend);
2242 }
2243
2244 static void
2245 gen7_render_depth_stencil_state(VADriverContextP ctx)
2246 {
2247     struct i965_driver_data *i965 = i965_driver_data(ctx);
2248     struct i965_render_state *render_state = &i965->render_state;
2249     struct gen6_depth_stencil_state *depth_stencil_state;
2250     
2251     dri_bo_map(render_state->cc.depth_stencil, 1);
2252     assert(render_state->cc.depth_stencil->virtual);
2253     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2254     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2255     dri_bo_unmap(render_state->cc.depth_stencil);
2256 }
2257
2258 static void 
2259 gen7_render_sampler(VADriverContextP ctx)
2260 {
2261     struct i965_driver_data *i965 = i965_driver_data(ctx);
2262     struct i965_render_state *render_state = &i965->render_state;
2263     struct gen7_sampler_state *sampler_state;
2264     int i;
2265     
2266     assert(render_state->wm.sampler_count > 0);
2267     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2268
2269     dri_bo_map(render_state->wm.sampler, 1);
2270     assert(render_state->wm.sampler->virtual);
2271     sampler_state = render_state->wm.sampler->virtual;
2272     for (i = 0; i < render_state->wm.sampler_count; i++) {
2273         memset(sampler_state, 0, sizeof(*sampler_state));
2274         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2275         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2276         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2277         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2278         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2279         sampler_state++;
2280     }
2281
2282     dri_bo_unmap(render_state->wm.sampler);
2283 }
2284
2285 static void
2286 gen7_render_setup_states(
2287     VADriverContextP   ctx,
2288     VASurfaceID        surface,
2289     const VARectangle *src_rect,
2290     const VARectangle *dst_rect
2291 )
2292 {
2293     i965_render_dest_surface_state(ctx, 0);
2294     i965_render_src_surfaces_state(ctx, surface);
2295     gen7_render_sampler(ctx);
2296     i965_render_cc_viewport(ctx);
2297     gen7_render_color_calc_state(ctx);
2298     gen7_render_blend_state(ctx);
2299     gen7_render_depth_stencil_state(ctx);
2300     i965_render_upload_constants(ctx, surface);
2301     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2302 }
2303
2304 static void
2305 gen7_emit_invarient_states(VADriverContextP ctx)
2306 {
2307     struct i965_driver_data *i965 = i965_driver_data(ctx);
2308     struct intel_batchbuffer *batch = i965->batch;
2309
2310     BEGIN_BATCH(batch, 1);
2311     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2312     ADVANCE_BATCH(batch);
2313
2314     BEGIN_BATCH(batch, 4);
2315     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2316     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2317               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2318     OUT_BATCH(batch, 0);
2319     OUT_BATCH(batch, 0);
2320     ADVANCE_BATCH(batch);
2321
2322     BEGIN_BATCH(batch, 2);
2323     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2324     OUT_BATCH(batch, 1);
2325     ADVANCE_BATCH(batch);
2326
2327     /* Set system instruction pointer */
2328     BEGIN_BATCH(batch, 2);
2329     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2330     OUT_BATCH(batch, 0);
2331     ADVANCE_BATCH(batch);
2332 }
2333
2334 static void
2335 gen7_emit_state_base_address(VADriverContextP ctx)
2336 {
2337     struct i965_driver_data *i965 = i965_driver_data(ctx);
2338     struct intel_batchbuffer *batch = i965->batch;
2339     struct i965_render_state *render_state = &i965->render_state;
2340
2341     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2342     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2343     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2344     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2345     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2346     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2347     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2348     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2349     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2350     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2351 }
2352
2353 static void
2354 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2355 {
2356     struct i965_driver_data *i965 = i965_driver_data(ctx);
2357     struct intel_batchbuffer *batch = i965->batch;
2358     struct i965_render_state *render_state = &i965->render_state;
2359
2360     BEGIN_BATCH(batch, 2);
2361     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2362     OUT_RELOC(batch,
2363               render_state->cc.viewport,
2364               I915_GEM_DOMAIN_INSTRUCTION, 0,
2365               0);
2366     ADVANCE_BATCH(batch);
2367
2368     BEGIN_BATCH(batch, 2);
2369     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2370     OUT_BATCH(batch, 0);
2371     ADVANCE_BATCH(batch);
2372 }
2373
2374 /*
2375  * URB layout on GEN7 
2376  * ----------------------------------------
2377  * | PS Push Constants (8KB) | VS entries |
2378  * ----------------------------------------
2379  */
2380 static void
2381 gen7_emit_urb(VADriverContextP ctx)
2382 {
2383     struct i965_driver_data *i965 = i965_driver_data(ctx);
2384     struct intel_batchbuffer *batch = i965->batch;
2385
2386     BEGIN_BATCH(batch, 2);
2387     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2388     OUT_BATCH(batch, 8); /* in 1KBs */
2389     ADVANCE_BATCH(batch);
2390
2391     BEGIN_BATCH(batch, 2);
2392     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2393     OUT_BATCH(batch, 
2394               (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
2395               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2396               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2397    ADVANCE_BATCH(batch);
2398
2399    BEGIN_BATCH(batch, 2);
2400    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2401    OUT_BATCH(batch,
2402              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2403              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2404    ADVANCE_BATCH(batch);
2405
2406    BEGIN_BATCH(batch, 2);
2407    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2408    OUT_BATCH(batch,
2409              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2410              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2411    ADVANCE_BATCH(batch);
2412
2413    BEGIN_BATCH(batch, 2);
2414    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2415    OUT_BATCH(batch,
2416              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2417              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2418    ADVANCE_BATCH(batch);
2419 }
2420
2421 static void
2422 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2423 {
2424     struct i965_driver_data *i965 = i965_driver_data(ctx);
2425     struct intel_batchbuffer *batch = i965->batch;
2426     struct i965_render_state *render_state = &i965->render_state;
2427
2428     BEGIN_BATCH(batch, 2);
2429     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2430     OUT_RELOC(batch,
2431               render_state->cc.state,
2432               I915_GEM_DOMAIN_INSTRUCTION, 0,
2433               1);
2434     ADVANCE_BATCH(batch);
2435
2436     BEGIN_BATCH(batch, 2);
2437     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2438     OUT_RELOC(batch,
2439               render_state->cc.blend,
2440               I915_GEM_DOMAIN_INSTRUCTION, 0,
2441               1);
2442     ADVANCE_BATCH(batch);
2443
2444     BEGIN_BATCH(batch, 2);
2445     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2446     OUT_RELOC(batch,
2447               render_state->cc.depth_stencil,
2448               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2449               1);
2450     ADVANCE_BATCH(batch);
2451 }
2452
2453 static void
2454 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2455 {
2456     struct i965_driver_data *i965 = i965_driver_data(ctx);
2457     struct intel_batchbuffer *batch = i965->batch;
2458     struct i965_render_state *render_state = &i965->render_state;
2459
2460     BEGIN_BATCH(batch, 2);
2461     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2462     OUT_RELOC(batch,
2463               render_state->wm.sampler,
2464               I915_GEM_DOMAIN_INSTRUCTION, 0,
2465               0);
2466     ADVANCE_BATCH(batch);
2467 }
2468
2469 static void
2470 gen7_emit_binding_table(VADriverContextP ctx)
2471 {
2472     struct i965_driver_data *i965 = i965_driver_data(ctx);
2473     struct intel_batchbuffer *batch = i965->batch;
2474
2475     BEGIN_BATCH(batch, 2);
2476     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2477     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2478     ADVANCE_BATCH(batch);
2479 }
2480
2481 static void
2482 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2483 {
2484     struct i965_driver_data *i965 = i965_driver_data(ctx);
2485     struct intel_batchbuffer *batch = i965->batch;
2486
2487     BEGIN_BATCH(batch, 7);
2488     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2489     OUT_BATCH(batch,
2490               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2491               (I965_SURFACE_NULL << 29));
2492     OUT_BATCH(batch, 0);
2493     OUT_BATCH(batch, 0);
2494     OUT_BATCH(batch, 0);
2495     OUT_BATCH(batch, 0);
2496     OUT_BATCH(batch, 0);
2497     ADVANCE_BATCH(batch);
2498
2499     BEGIN_BATCH(batch, 3);
2500     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2501     OUT_BATCH(batch, 0);
2502     OUT_BATCH(batch, 0);
2503     ADVANCE_BATCH(batch);
2504 }
2505
2506 static void
2507 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2508 {
2509     i965_render_drawing_rectangle(ctx);
2510 }
2511
2512 static void 
2513 gen7_emit_vs_state(VADriverContextP ctx)
2514 {
2515     struct i965_driver_data *i965 = i965_driver_data(ctx);
2516     struct intel_batchbuffer *batch = i965->batch;
2517
2518     /* disable VS constant buffer */
2519     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2520     OUT_BATCH(batch, 0);
2521     OUT_BATCH(batch, 0);
2522     OUT_BATCH(batch, 0);
2523     OUT_BATCH(batch, 0);
2524     OUT_BATCH(batch, 0);
2525     OUT_BATCH(batch, 0);
2526         
2527     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2528     OUT_BATCH(batch, 0); /* without VS kernel */
2529     OUT_BATCH(batch, 0);
2530     OUT_BATCH(batch, 0);
2531     OUT_BATCH(batch, 0);
2532     OUT_BATCH(batch, 0); /* pass-through */
2533 }
2534
2535 static void 
2536 gen7_emit_bypass_state(VADriverContextP ctx)
2537 {
2538     struct i965_driver_data *i965 = i965_driver_data(ctx);
2539     struct intel_batchbuffer *batch = i965->batch;
2540
2541     /* bypass GS */
2542     BEGIN_BATCH(batch, 7);
2543     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2544     OUT_BATCH(batch, 0);
2545     OUT_BATCH(batch, 0);
2546     OUT_BATCH(batch, 0);
2547     OUT_BATCH(batch, 0);
2548     OUT_BATCH(batch, 0);
2549     OUT_BATCH(batch, 0);
2550     ADVANCE_BATCH(batch);
2551
2552     BEGIN_BATCH(batch, 7);      
2553     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2554     OUT_BATCH(batch, 0); /* without GS kernel */
2555     OUT_BATCH(batch, 0);
2556     OUT_BATCH(batch, 0);
2557     OUT_BATCH(batch, 0);
2558     OUT_BATCH(batch, 0);
2559     OUT_BATCH(batch, 0); /* pass-through */
2560     ADVANCE_BATCH(batch);
2561
2562     BEGIN_BATCH(batch, 2);
2563     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2564     OUT_BATCH(batch, 0);
2565     ADVANCE_BATCH(batch);
2566
2567     /* disable HS */
2568     BEGIN_BATCH(batch, 7);
2569     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2570     OUT_BATCH(batch, 0);
2571     OUT_BATCH(batch, 0);
2572     OUT_BATCH(batch, 0);
2573     OUT_BATCH(batch, 0);
2574     OUT_BATCH(batch, 0);
2575     OUT_BATCH(batch, 0);
2576     ADVANCE_BATCH(batch);
2577
2578     BEGIN_BATCH(batch, 7);
2579     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2580     OUT_BATCH(batch, 0);
2581     OUT_BATCH(batch, 0);
2582     OUT_BATCH(batch, 0);
2583     OUT_BATCH(batch, 0);
2584     OUT_BATCH(batch, 0);
2585     OUT_BATCH(batch, 0);
2586     ADVANCE_BATCH(batch);
2587
2588     BEGIN_BATCH(batch, 2);
2589     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2590     OUT_BATCH(batch, 0);
2591     ADVANCE_BATCH(batch);
2592
2593     /* Disable TE */
2594     BEGIN_BATCH(batch, 4);
2595     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2596     OUT_BATCH(batch, 0);
2597     OUT_BATCH(batch, 0);
2598     OUT_BATCH(batch, 0);
2599     ADVANCE_BATCH(batch);
2600
2601     /* Disable DS */
2602     BEGIN_BATCH(batch, 7);
2603     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2604     OUT_BATCH(batch, 0);
2605     OUT_BATCH(batch, 0);
2606     OUT_BATCH(batch, 0);
2607     OUT_BATCH(batch, 0);
2608     OUT_BATCH(batch, 0);
2609     OUT_BATCH(batch, 0);
2610     ADVANCE_BATCH(batch);
2611
2612     BEGIN_BATCH(batch, 6);
2613     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2614     OUT_BATCH(batch, 0);
2615     OUT_BATCH(batch, 0);
2616     OUT_BATCH(batch, 0);
2617     OUT_BATCH(batch, 0);
2618     OUT_BATCH(batch, 0);
2619     ADVANCE_BATCH(batch);
2620
2621     BEGIN_BATCH(batch, 2);
2622     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2623     OUT_BATCH(batch, 0);
2624     ADVANCE_BATCH(batch);
2625
2626     /* Disable STREAMOUT */
2627     BEGIN_BATCH(batch, 3);
2628     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2629     OUT_BATCH(batch, 0);
2630     OUT_BATCH(batch, 0);
2631     ADVANCE_BATCH(batch);
2632 }
2633
2634 static void 
2635 gen7_emit_clip_state(VADriverContextP ctx)
2636 {
2637     struct i965_driver_data *i965 = i965_driver_data(ctx);
2638     struct intel_batchbuffer *batch = i965->batch;
2639
2640     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2641     OUT_BATCH(batch, 0);
2642     OUT_BATCH(batch, 0); /* pass-through */
2643     OUT_BATCH(batch, 0);
2644 }
2645
2646 static void 
2647 gen7_emit_sf_state(VADriverContextP ctx)
2648 {
2649     struct i965_driver_data *i965 = i965_driver_data(ctx);
2650     struct intel_batchbuffer *batch = i965->batch;
2651
2652     BEGIN_BATCH(batch, 14);
2653     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2654     OUT_BATCH(batch,
2655               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2656               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2657               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2658     OUT_BATCH(batch, 0);
2659     OUT_BATCH(batch, 0);
2660     OUT_BATCH(batch, 0); /* DW4 */
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0);
2665     OUT_BATCH(batch, 0); /* DW9 */
2666     OUT_BATCH(batch, 0);
2667     OUT_BATCH(batch, 0);
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     ADVANCE_BATCH(batch);
2671
2672     BEGIN_BATCH(batch, 7);
2673     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2674     OUT_BATCH(batch, 0);
2675     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2676     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     ADVANCE_BATCH(batch);
2681 }
2682
2683 static void 
2684 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2685 {
2686     struct i965_driver_data *i965 = i965_driver_data(ctx);
2687     struct intel_batchbuffer *batch = i965->batch;
2688     struct i965_render_state *render_state = &i965->render_state;
2689
2690     BEGIN_BATCH(batch, 3);
2691     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2692     OUT_BATCH(batch,
2693               GEN7_WM_DISPATCH_ENABLE |
2694               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2695     OUT_BATCH(batch, 0);
2696     ADVANCE_BATCH(batch);
2697
2698     BEGIN_BATCH(batch, 7);
2699     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2700     OUT_BATCH(batch, 1);
2701     OUT_BATCH(batch, 0);
2702     OUT_RELOC(batch, 
2703               render_state->curbe.bo,
2704               I915_GEM_DOMAIN_INSTRUCTION, 0,
2705               0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0);
2708     OUT_BATCH(batch, 0);
2709     ADVANCE_BATCH(batch);
2710
2711     BEGIN_BATCH(batch, 8);
2712     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2713     OUT_RELOC(batch, 
2714               render_state->render_kernels[kernel].bo,
2715               I915_GEM_DOMAIN_INSTRUCTION, 0,
2716               0);
2717     OUT_BATCH(batch, 
2718               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2719               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2720     OUT_BATCH(batch, 0); /* scratch space base offset */
2721     OUT_BATCH(batch, 
2722               ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
2723               GEN7_PS_PUSH_CONSTANT_ENABLE |
2724               GEN7_PS_ATTRIBUTE_ENABLE |
2725               GEN7_PS_16_DISPATCH_ENABLE);
2726     OUT_BATCH(batch, 
2727               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2728     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2729     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2730     ADVANCE_BATCH(batch);
2731 }
2732
2733 static void
2734 gen7_emit_vertex_element_state(VADriverContextP ctx)
2735 {
2736     struct i965_driver_data *i965 = i965_driver_data(ctx);
2737     struct intel_batchbuffer *batch = i965->batch;
2738
2739     /* Set up our vertex elements, sourced from the single vertex buffer. */
2740     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2741     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2742     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2743               GEN6_VE0_VALID |
2744               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2745               (0 << VE0_OFFSET_SHIFT));
2746     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2747               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2748               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2749               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2750     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2751     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2752               GEN6_VE0_VALID |
2753               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2754               (8 << VE0_OFFSET_SHIFT));
2755     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2756               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2757               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2758               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2759 }
2760
2761 static void
2762 gen7_emit_vertices(VADriverContextP ctx)
2763 {
2764     struct i965_driver_data *i965 = i965_driver_data(ctx);
2765     struct intel_batchbuffer *batch = i965->batch;
2766     struct i965_render_state *render_state = &i965->render_state;
2767
2768     BEGIN_BATCH(batch, 5);
2769     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2770     OUT_BATCH(batch, 
2771               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2772               GEN6_VB0_VERTEXDATA |
2773               GEN7_VB0_ADDRESS_MODIFYENABLE |
2774               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2775     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2776     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2777     OUT_BATCH(batch, 0);
2778     ADVANCE_BATCH(batch);
2779
2780     BEGIN_BATCH(batch, 7);
2781     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2782     OUT_BATCH(batch,
2783               _3DPRIM_RECTLIST |
2784               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2785     OUT_BATCH(batch, 3); /* vertex count per instance */
2786     OUT_BATCH(batch, 0); /* start vertex offset */
2787     OUT_BATCH(batch, 1); /* single instance */
2788     OUT_BATCH(batch, 0); /* start instance location */
2789     OUT_BATCH(batch, 0);
2790     ADVANCE_BATCH(batch);
2791 }
2792
2793 static void
2794 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2795 {
2796     struct i965_driver_data *i965 = i965_driver_data(ctx);
2797     struct intel_batchbuffer *batch = i965->batch;
2798
2799     intel_batchbuffer_start_atomic(batch, 0x1000);
2800     intel_batchbuffer_emit_mi_flush(batch);
2801     gen7_emit_invarient_states(ctx);
2802     gen7_emit_state_base_address(ctx);
2803     gen7_emit_viewport_state_pointers(ctx);
2804     gen7_emit_urb(ctx);
2805     gen7_emit_cc_state_pointers(ctx);
2806     gen7_emit_sampler_state_pointers(ctx);
2807     gen7_emit_bypass_state(ctx);
2808     gen7_emit_vs_state(ctx);
2809     gen7_emit_clip_state(ctx);
2810     gen7_emit_sf_state(ctx);
2811     gen7_emit_wm_state(ctx, kernel);
2812     gen7_emit_binding_table(ctx);
2813     gen7_emit_depth_buffer_state(ctx);
2814     gen7_emit_drawing_rectangle(ctx);
2815     gen7_emit_vertex_element_state(ctx);
2816     gen7_emit_vertices(ctx);
2817     intel_batchbuffer_end_atomic(batch);
2818 }
2819
2820 static void
2821 gen7_render_put_surface(
2822     VADriverContextP   ctx,
2823     VASurfaceID        surface,
2824     const VARectangle *src_rect,
2825     const VARectangle *dst_rect,
2826     unsigned int       flags
2827 )
2828 {
2829     struct i965_driver_data *i965 = i965_driver_data(ctx);
2830     struct intel_batchbuffer *batch = i965->batch;
2831
2832     gen7_render_initialize(ctx);
2833     gen7_render_setup_states(ctx, surface, src_rect, dst_rect);
2834     i965_clear_dest_region(ctx);
2835     gen7_render_emit_states(ctx, PS_KERNEL);
2836     intel_batchbuffer_flush(batch);
2837 }
2838
2839 static void
2840 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2841 {
2842     struct i965_driver_data *i965 = i965_driver_data(ctx);
2843     struct i965_render_state *render_state = &i965->render_state;
2844     struct gen6_blend_state *blend_state;
2845
2846     dri_bo_unmap(render_state->cc.state);    
2847     dri_bo_map(render_state->cc.blend, 1);
2848     assert(render_state->cc.blend->virtual);
2849     blend_state = render_state->cc.blend->virtual;
2850     memset(blend_state, 0, sizeof(*blend_state));
2851     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2852     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2853     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2854     blend_state->blend0.blend_enable = 1;
2855     blend_state->blend1.post_blend_clamp_enable = 1;
2856     blend_state->blend1.pre_blend_clamp_enable = 1;
2857     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2858     dri_bo_unmap(render_state->cc.blend);
2859 }
2860
2861 static void
2862 gen7_subpicture_render_setup_states(
2863     VADriverContextP   ctx,
2864     VASurfaceID        surface,
2865     const VARectangle *src_rect,
2866     const VARectangle *dst_rect
2867 )
2868 {
2869     i965_render_dest_surface_state(ctx, 0);
2870     i965_subpic_render_src_surfaces_state(ctx, surface);
2871     i965_render_sampler(ctx);
2872     i965_render_cc_viewport(ctx);
2873     gen7_render_color_calc_state(ctx);
2874     gen7_subpicture_render_blend_state(ctx);
2875     gen7_render_depth_stencil_state(ctx);
2876     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2877 }
2878
2879 static void
2880 gen7_render_put_subpicture(
2881     VADriverContextP   ctx,
2882     VASurfaceID        surface,
2883     const VARectangle *src_rect,
2884     const VARectangle *dst_rect
2885 )
2886 {
2887     struct i965_driver_data *i965 = i965_driver_data(ctx);
2888     struct intel_batchbuffer *batch = i965->batch;
2889     struct object_surface *obj_surface = SURFACE(surface);
2890     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2891
2892     assert(obj_subpic);
2893     gen7_render_initialize(ctx);
2894     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2895     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2896     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2897     intel_batchbuffer_flush(batch);
2898 }
2899
2900
2901 /*
2902  * global functions
2903  */
2904 VAStatus 
2905 i965_DestroySurfaces(VADriverContextP ctx,
2906                      VASurfaceID *surface_list,
2907                      int num_surfaces);
2908 void
2909 intel_render_put_surface(
2910     VADriverContextP   ctx,
2911     VASurfaceID        surface,
2912     const VARectangle *src_rect,
2913     const VARectangle *dst_rect,
2914     unsigned int       flags
2915 )
2916 {
2917     struct i965_driver_data *i965 = i965_driver_data(ctx);
2918     int has_done_scaling = 0;
2919     VASurfaceID in_surface_id = surface;
2920     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
2921
2922     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
2923
2924     if (out_surface_id != VA_INVALID_ID)
2925         in_surface_id = out_surface_id;
2926
2927     if (IS_GEN7(i965->intel.device_id))
2928         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2929     else if (IS_GEN6(i965->intel.device_id))
2930         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2931     else
2932         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2933
2934     if (in_surface_id != surface)
2935         i965_DestroySurfaces(ctx, &in_surface_id, 1);
2936 }
2937
2938 void
2939 intel_render_put_subpicture(
2940     VADriverContextP   ctx,
2941     VASurfaceID        surface,
2942     const VARectangle *src_rect,
2943     const VARectangle *dst_rect
2944 )
2945 {
2946     struct i965_driver_data *i965 = i965_driver_data(ctx);
2947
2948     if (IS_GEN7(i965->intel.device_id))
2949         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2950     else if (IS_GEN6(i965->intel.device_id))
2951         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2952     else
2953         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2954 }
2955
2956 Bool 
2957 i965_render_init(VADriverContextP ctx)
2958 {
2959     struct i965_driver_data *i965 = i965_driver_data(ctx);
2960     struct i965_render_state *render_state = &i965->render_state;
2961     int i;
2962
2963     /* kernel */
2964     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2965                                  sizeof(render_kernels_gen5[0])));
2966     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2967                                  sizeof(render_kernels_gen6[0])));
2968
2969     if (IS_GEN7(i965->intel.device_id))
2970         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
2971     else if (IS_GEN6(i965->intel.device_id))
2972         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2973     else if (IS_IRONLAKE(i965->intel.device_id))
2974         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2975     else
2976         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2977
2978     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2979         struct i965_kernel *kernel = &render_state->render_kernels[i];
2980
2981         if (!kernel->size)
2982             continue;
2983
2984         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2985                                   kernel->name, 
2986                                   kernel->size, 0x1000);
2987         assert(kernel->bo);
2988         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2989     }
2990
2991     /* constant buffer */
2992     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2993                       "constant buffer",
2994                       4096, 64);
2995     assert(render_state->curbe.bo);
2996
2997     return True;
2998 }
2999
3000 Bool 
3001 i965_render_terminate(VADriverContextP ctx)
3002 {
3003     int i;
3004     struct i965_driver_data *i965 = i965_driver_data(ctx);
3005     struct i965_render_state *render_state = &i965->render_state;
3006
3007     dri_bo_unreference(render_state->curbe.bo);
3008     render_state->curbe.bo = NULL;
3009
3010     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3011         struct i965_kernel *kernel = &render_state->render_kernels[i];
3012         
3013         dri_bo_unreference(kernel->bo);
3014         kernel->bo = NULL;
3015     }
3016
3017     dri_bo_unreference(render_state->vb.vertex_buffer);
3018     render_state->vb.vertex_buffer = NULL;
3019     dri_bo_unreference(render_state->vs.state);
3020     render_state->vs.state = NULL;
3021     dri_bo_unreference(render_state->sf.state);
3022     render_state->sf.state = NULL;
3023     dri_bo_unreference(render_state->wm.sampler);
3024     render_state->wm.sampler = NULL;
3025     dri_bo_unreference(render_state->wm.state);
3026     render_state->wm.state = NULL;
3027     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3028     dri_bo_unreference(render_state->cc.viewport);
3029     render_state->cc.viewport = NULL;
3030     dri_bo_unreference(render_state->cc.state);
3031     render_state->cc.state = NULL;
3032     dri_bo_unreference(render_state->cc.blend);
3033     render_state->cc.blend = NULL;
3034     dri_bo_unreference(render_state->cc.depth_stencil);
3035     render_state->cc.depth_stencil = NULL;
3036
3037     if (render_state->draw_region) {
3038         dri_bo_unreference(render_state->draw_region->bo);
3039         free(render_state->draw_region);
3040         render_state->draw_region = NULL;
3041     }
3042
3043     return True;
3044 }
3045