Avoid depending on va_backend.h for some files
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_dricommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
137 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
138 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
139 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
140 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
141
142 static uint32_t float_to_uint (float f) 
143 {
144     union {
145         uint32_t i; 
146         float f;
147     } x;
148
149     x.f = f;
150     return x.i;
151 }
152
153 enum 
154 {
155     SF_KERNEL = 0,
156     PS_KERNEL,
157     PS_SUBPIC_KERNEL
158 };
159
160 static struct i965_kernel render_kernels_gen4[] = {
161     {
162         "SF",
163         SF_KERNEL,
164         sf_kernel_static,
165         sizeof(sf_kernel_static),
166         NULL
167     },
168     {
169         "PS",
170         PS_KERNEL,
171         ps_kernel_static,
172         sizeof(ps_kernel_static),
173         NULL
174     },
175
176     {
177         "PS_SUBPIC",
178         PS_SUBPIC_KERNEL,
179         ps_subpic_kernel_static,
180         sizeof(ps_subpic_kernel_static),
181         NULL
182     }
183 };
184
185 static struct i965_kernel render_kernels_gen5[] = {
186     {
187         "SF",
188         SF_KERNEL,
189         sf_kernel_static_gen5,
190         sizeof(sf_kernel_static_gen5),
191         NULL
192     },
193     {
194         "PS",
195         PS_KERNEL,
196         ps_kernel_static_gen5,
197         sizeof(ps_kernel_static_gen5),
198         NULL
199     },
200
201     {
202         "PS_SUBPIC",
203         PS_SUBPIC_KERNEL,
204         ps_subpic_kernel_static_gen5,
205         sizeof(ps_subpic_kernel_static_gen5),
206         NULL
207     }
208 };
209
210 static struct i965_kernel render_kernels_gen6[] = {
211     {
212         "SF",
213         SF_KERNEL,
214         sf_kernel_static_gen6,
215         sizeof(sf_kernel_static_gen6),
216         NULL
217     },
218     {
219         "PS",
220         PS_KERNEL,
221         ps_kernel_static_gen6,
222         sizeof(ps_kernel_static_gen6),
223         NULL
224     },
225
226     {
227         "PS_SUBPIC",
228         PS_SUBPIC_KERNEL,
229         ps_subpic_kernel_static_gen6,
230         sizeof(ps_subpic_kernel_static_gen6),
231         NULL
232     }
233 };
234
235 static struct i965_kernel render_kernels_gen7[] = {
236     {
237         "SF",
238         SF_KERNEL,
239         sf_kernel_static_gen7,
240         sizeof(sf_kernel_static_gen7),
241         NULL
242     },
243     {
244         "PS",
245         PS_KERNEL,
246         ps_kernel_static_gen7,
247         sizeof(ps_kernel_static_gen7),
248         NULL
249     },
250
251     {
252         "PS_SUBPIC",
253         PS_SUBPIC_KERNEL,
254         ps_subpic_kernel_static_gen7,
255         sizeof(ps_subpic_kernel_static_gen7),
256         NULL
257     }
258 };
259
260 #define URB_VS_ENTRIES        8
261 #define URB_VS_ENTRY_SIZE     1
262
263 #define URB_GS_ENTRIES        0
264 #define URB_GS_ENTRY_SIZE     0
265
266 #define URB_CLIP_ENTRIES      0
267 #define URB_CLIP_ENTRY_SIZE   0
268
269 #define URB_SF_ENTRIES        1
270 #define URB_SF_ENTRY_SIZE     2
271
272 #define URB_CS_ENTRIES        1
273 #define URB_CS_ENTRY_SIZE     1
274
275 static void
276 i965_render_vs_unit(VADriverContextP ctx)
277 {
278     struct i965_driver_data *i965 = i965_driver_data(ctx);
279     struct i965_render_state *render_state = &i965->render_state;
280     struct i965_vs_unit_state *vs_state;
281
282     dri_bo_map(render_state->vs.state, 1);
283     assert(render_state->vs.state->virtual);
284     vs_state = render_state->vs.state->virtual;
285     memset(vs_state, 0, sizeof(*vs_state));
286
287     if (IS_IRONLAKE(i965->intel.device_id))
288         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
289     else
290         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
291
292     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
293     vs_state->vs6.vs_enable = 0;
294     vs_state->vs6.vert_cache_disable = 1;
295     
296     dri_bo_unmap(render_state->vs.state);
297 }
298
299 static void
300 i965_render_sf_unit(VADriverContextP ctx)
301 {
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     struct i965_render_state *render_state = &i965->render_state;
304     struct i965_sf_unit_state *sf_state;
305
306     dri_bo_map(render_state->sf.state, 1);
307     assert(render_state->sf.state->virtual);
308     sf_state = render_state->sf.state->virtual;
309     memset(sf_state, 0, sizeof(*sf_state));
310
311     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
312     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
313
314     sf_state->sf1.single_program_flow = 1; /* XXX */
315     sf_state->sf1.binding_table_entry_count = 0;
316     sf_state->sf1.thread_priority = 0;
317     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
318     sf_state->sf1.illegal_op_exception_enable = 1;
319     sf_state->sf1.mask_stack_exception_enable = 1;
320     sf_state->sf1.sw_exception_enable = 1;
321
322     /* scratch space is not used in our kernel */
323     sf_state->thread2.per_thread_scratch_space = 0;
324     sf_state->thread2.scratch_space_base_pointer = 0;
325
326     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
327     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
328     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
329     sf_state->thread3.urb_entry_read_offset = 0;
330     sf_state->thread3.dispatch_grf_start_reg = 3;
331
332     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
333     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
334     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
335     sf_state->thread4.stats_enable = 1;
336
337     sf_state->sf5.viewport_transform = 0; /* skip viewport */
338
339     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
340     sf_state->sf6.scissor = 0;
341
342     sf_state->sf7.trifan_pv = 2;
343
344     sf_state->sf6.dest_org_vbias = 0x8;
345     sf_state->sf6.dest_org_hbias = 0x8;
346
347     dri_bo_emit_reloc(render_state->sf.state,
348                       I915_GEM_DOMAIN_INSTRUCTION, 0,
349                       sf_state->thread0.grf_reg_count << 1,
350                       offsetof(struct i965_sf_unit_state, thread0),
351                       render_state->render_kernels[SF_KERNEL].bo);
352
353     dri_bo_unmap(render_state->sf.state);
354 }
355
356 static void 
357 i965_render_sampler(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sampler_state *sampler_state;
362     int i;
363     
364     assert(render_state->wm.sampler_count > 0);
365     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
366
367     dri_bo_map(render_state->wm.sampler, 1);
368     assert(render_state->wm.sampler->virtual);
369     sampler_state = render_state->wm.sampler->virtual;
370     for (i = 0; i < render_state->wm.sampler_count; i++) {
371         memset(sampler_state, 0, sizeof(*sampler_state));
372         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
373         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
374         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
375         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
376         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
377         sampler_state++;
378     }
379
380     dri_bo_unmap(render_state->wm.sampler);
381 }
382 static void
383 i965_subpic_render_wm_unit(VADriverContextP ctx)
384 {
385     struct i965_driver_data *i965 = i965_driver_data(ctx);
386     struct i965_render_state *render_state = &i965->render_state;
387     struct i965_wm_unit_state *wm_state;
388
389     assert(render_state->wm.sampler);
390
391     dri_bo_map(render_state->wm.state, 1);
392     assert(render_state->wm.state->virtual);
393     wm_state = render_state->wm.state->virtual;
394     memset(wm_state, 0, sizeof(*wm_state));
395
396     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
397     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
398
399     wm_state->thread1.single_program_flow = 1; /* XXX */
400
401     if (IS_IRONLAKE(i965->intel.device_id))
402         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
403     else
404         wm_state->thread1.binding_table_entry_count = 7;
405
406     wm_state->thread2.scratch_space_base_pointer = 0;
407     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
408
409     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
410     wm_state->thread3.const_urb_entry_read_length = 0;
411     wm_state->thread3.const_urb_entry_read_offset = 0;
412     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
413     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
414
415     wm_state->wm4.stats_enable = 0;
416     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
417
418     if (IS_IRONLAKE(i965->intel.device_id)) {
419         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
420         wm_state->wm5.max_threads = 12 * 6 - 1;
421     } else {
422         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
423         wm_state->wm5.max_threads = 10 * 5 - 1;
424     }
425
426     wm_state->wm5.thread_dispatch_enable = 1;
427     wm_state->wm5.enable_16_pix = 1;
428     wm_state->wm5.enable_8_pix = 0;
429     wm_state->wm5.early_depth_test = 1;
430
431     dri_bo_emit_reloc(render_state->wm.state,
432                       I915_GEM_DOMAIN_INSTRUCTION, 0,
433                       wm_state->thread0.grf_reg_count << 1,
434                       offsetof(struct i965_wm_unit_state, thread0),
435                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
436
437     dri_bo_emit_reloc(render_state->wm.state,
438                       I915_GEM_DOMAIN_INSTRUCTION, 0,
439                       wm_state->wm4.sampler_count << 2,
440                       offsetof(struct i965_wm_unit_state, wm4),
441                       render_state->wm.sampler);
442
443     dri_bo_unmap(render_state->wm.state);
444 }
445
446
447 static void
448 i965_render_wm_unit(VADriverContextP ctx)
449 {
450     struct i965_driver_data *i965 = i965_driver_data(ctx);
451     struct i965_render_state *render_state = &i965->render_state;
452     struct i965_wm_unit_state *wm_state;
453
454     assert(render_state->wm.sampler);
455
456     dri_bo_map(render_state->wm.state, 1);
457     assert(render_state->wm.state->virtual);
458     wm_state = render_state->wm.state->virtual;
459     memset(wm_state, 0, sizeof(*wm_state));
460
461     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
462     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
463
464     wm_state->thread1.single_program_flow = 1; /* XXX */
465
466     if (IS_IRONLAKE(i965->intel.device_id))
467         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
468     else
469         wm_state->thread1.binding_table_entry_count = 7;
470
471     wm_state->thread2.scratch_space_base_pointer = 0;
472     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
473
474     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
475     wm_state->thread3.const_urb_entry_read_length = 1;
476     wm_state->thread3.const_urb_entry_read_offset = 0;
477     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
478     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
479
480     wm_state->wm4.stats_enable = 0;
481     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
482
483     if (IS_IRONLAKE(i965->intel.device_id)) {
484         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
485         wm_state->wm5.max_threads = 12 * 6 - 1;
486     } else {
487         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
488         wm_state->wm5.max_threads = 10 * 5 - 1;
489     }
490
491     wm_state->wm5.thread_dispatch_enable = 1;
492     wm_state->wm5.enable_16_pix = 1;
493     wm_state->wm5.enable_8_pix = 0;
494     wm_state->wm5.early_depth_test = 1;
495
496     dri_bo_emit_reloc(render_state->wm.state,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       wm_state->thread0.grf_reg_count << 1,
499                       offsetof(struct i965_wm_unit_state, thread0),
500                       render_state->render_kernels[PS_KERNEL].bo);
501
502     dri_bo_emit_reloc(render_state->wm.state,
503                       I915_GEM_DOMAIN_INSTRUCTION, 0,
504                       wm_state->wm4.sampler_count << 2,
505                       offsetof(struct i965_wm_unit_state, wm4),
506                       render_state->wm.sampler);
507
508     dri_bo_unmap(render_state->wm.state);
509 }
510
511 static void 
512 i965_render_cc_viewport(VADriverContextP ctx)
513 {
514     struct i965_driver_data *i965 = i965_driver_data(ctx);
515     struct i965_render_state *render_state = &i965->render_state;
516     struct i965_cc_viewport *cc_viewport;
517
518     dri_bo_map(render_state->cc.viewport, 1);
519     assert(render_state->cc.viewport->virtual);
520     cc_viewport = render_state->cc.viewport->virtual;
521     memset(cc_viewport, 0, sizeof(*cc_viewport));
522     
523     cc_viewport->min_depth = -1.e35;
524     cc_viewport->max_depth = 1.e35;
525
526     dri_bo_unmap(render_state->cc.viewport);
527 }
528
529 static void 
530 i965_subpic_render_cc_unit(VADriverContextP ctx)
531 {
532     struct i965_driver_data *i965 = i965_driver_data(ctx);
533     struct i965_render_state *render_state = &i965->render_state;
534     struct i965_cc_unit_state *cc_state;
535
536     assert(render_state->cc.viewport);
537
538     dri_bo_map(render_state->cc.state, 1);
539     assert(render_state->cc.state->virtual);
540     cc_state = render_state->cc.state->virtual;
541     memset(cc_state, 0, sizeof(*cc_state));
542
543     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
544     cc_state->cc2.depth_test = 0;       /* disable depth test */
545     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
546     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
547     cc_state->cc3.blend_enable = 1;     /* enable color blend */
548     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
549     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
550     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
551     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
552
553     cc_state->cc5.dither_enable = 0;    /* disable dither */
554     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
555     cc_state->cc5.statistics_enable = 1;
556     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
557     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
558     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
559
560     cc_state->cc6.clamp_post_alpha_blend = 0; 
561     cc_state->cc6.clamp_pre_alpha_blend  =0; 
562     
563     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
564     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
565     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
566     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
567    
568     /*alpha test reference*/
569     cc_state->cc7.alpha_ref.f =0.0 ;
570
571
572     dri_bo_emit_reloc(render_state->cc.state,
573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
574                       0,
575                       offsetof(struct i965_cc_unit_state, cc4),
576                       render_state->cc.viewport);
577
578     dri_bo_unmap(render_state->cc.state);
579 }
580
581
582 static void 
583 i965_render_cc_unit(VADriverContextP ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     struct i965_render_state *render_state = &i965->render_state;
587     struct i965_cc_unit_state *cc_state;
588
589     assert(render_state->cc.viewport);
590
591     dri_bo_map(render_state->cc.state, 1);
592     assert(render_state->cc.state->virtual);
593     cc_state = render_state->cc.state->virtual;
594     memset(cc_state, 0, sizeof(*cc_state));
595
596     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
597     cc_state->cc2.depth_test = 0;       /* disable depth test */
598     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
599     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
600     cc_state->cc3.blend_enable = 0;     /* disable color blend */
601     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
602     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
603
604     cc_state->cc5.dither_enable = 0;    /* disable dither */
605     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
606     cc_state->cc5.statistics_enable = 1;
607     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
608     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
609     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
610
611     dri_bo_emit_reloc(render_state->cc.state,
612                       I915_GEM_DOMAIN_INSTRUCTION, 0,
613                       0,
614                       offsetof(struct i965_cc_unit_state, cc4),
615                       render_state->cc.viewport);
616
617     dri_bo_unmap(render_state->cc.state);
618 }
619
620 static void
621 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
622 {
623     switch (tiling) {
624     case I915_TILING_NONE:
625         ss->ss3.tiled_surface = 0;
626         ss->ss3.tile_walk = 0;
627         break;
628     case I915_TILING_X:
629         ss->ss3.tiled_surface = 1;
630         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
631         break;
632     case I915_TILING_Y:
633         ss->ss3.tiled_surface = 1;
634         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
635         break;
636     }
637 }
638
639 static void
640 i965_render_set_surface_state(struct i965_surface_state *ss,
641                               dri_bo *bo, unsigned long offset,
642                               int width, int height,
643                               int pitch, int format)
644 {
645     unsigned int tiling;
646     unsigned int swizzle;
647
648     memset(ss, 0, sizeof(*ss));
649     ss->ss0.surface_type = I965_SURFACE_2D;
650     ss->ss0.surface_format = format;
651     ss->ss0.color_blend = 1;
652
653     ss->ss1.base_addr = bo->offset + offset;
654
655     ss->ss2.width = width - 1;
656     ss->ss2.height = height - 1;
657
658     ss->ss3.pitch = pitch - 1;
659
660     dri_bo_get_tiling(bo, &tiling, &swizzle);
661     i965_render_set_surface_tiling(ss, tiling);
662 }
663
664 static void
665 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
666 {
667    switch (tiling) {
668    case I915_TILING_NONE:
669       ss->ss0.tiled_surface = 0;
670       ss->ss0.tile_walk = 0;
671       break;
672    case I915_TILING_X:
673       ss->ss0.tiled_surface = 1;
674       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
675       break;
676    case I915_TILING_Y:
677       ss->ss0.tiled_surface = 1;
678       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
679       break;
680    }
681 }
682
683 static void
684 gen7_render_set_surface_state(struct gen7_surface_state *ss,
685                               dri_bo *bo, unsigned long offset,
686                               int width, int height,
687                               int pitch, int format)
688 {
689     unsigned int tiling;
690     unsigned int swizzle;
691
692     memset(ss, 0, sizeof(*ss));
693
694     ss->ss0.surface_type = I965_SURFACE_2D;
695     ss->ss0.surface_format = format;
696
697     ss->ss1.base_addr = bo->offset + offset;
698
699     ss->ss2.width = width - 1;
700     ss->ss2.height = height - 1;
701
702     ss->ss3.pitch = pitch - 1;
703
704     dri_bo_get_tiling(bo, &tiling, &swizzle);
705     gen7_render_set_surface_tiling(ss, tiling);
706 }
707
708 static void
709 i965_render_src_surface_state(VADriverContextP ctx, 
710                               int index,
711                               dri_bo *region,
712                               unsigned long offset,
713                               int w, int h,
714                               int pitch, int format)
715 {
716     struct i965_driver_data *i965 = i965_driver_data(ctx);  
717     struct i965_render_state *render_state = &i965->render_state;
718     void *ss;
719     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
720
721     assert(index < MAX_RENDER_SURFACES);
722
723     dri_bo_map(ss_bo, 1);
724     assert(ss_bo->virtual);
725     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
726
727     if (IS_GEN7(i965->intel.device_id)) {
728         gen7_render_set_surface_state(ss,
729                                       region, offset,
730                                       w, h,
731                                       pitch, format);
732         dri_bo_emit_reloc(ss_bo,
733                           I915_GEM_DOMAIN_SAMPLER, 0,
734                           offset,
735                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
736                           region);
737     } else {
738         i965_render_set_surface_state(ss,
739                                       region, offset,
740                                       w, h,
741                                       pitch, format);
742         dri_bo_emit_reloc(ss_bo,
743                           I915_GEM_DOMAIN_SAMPLER, 0,
744                           offset,
745                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
746                           region);
747     }
748
749     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
750     dri_bo_unmap(ss_bo);
751     render_state->wm.sampler_count++;
752 }
753
754 static void
755 i965_render_src_surfaces_state(VADriverContextP ctx,
756                               VASurfaceID surface)
757 {
758     struct i965_driver_data *i965 = i965_driver_data(ctx);  
759     struct object_surface *obj_surface;
760     int w, h;
761     int rw, rh;
762     dri_bo *region;
763
764     obj_surface = SURFACE(surface);
765     assert(obj_surface);
766
767     w = obj_surface->width;
768     h = obj_surface->height;
769     rw = obj_surface->orig_width;
770     rh = obj_surface->orig_height;
771     region = obj_surface->bo;
772
773     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
774     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
775
776     if (obj_surface->fourcc == VA_FOURCC('Y','V','1','2')) {
777         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
778
779         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
780         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
781         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
782         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
783     } else {
784         if (obj_surface->fourcc == VA_FOURCC('N','V','1','2')) {
785             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
786             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
787         } else {
788             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
789             
790             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
791             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
792             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
793             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
794         }
795     }
796 }
797
798 static void
799 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
800                               VASurfaceID surface)
801 {
802     struct i965_driver_data *i965 = i965_driver_data(ctx);  
803     struct object_surface *obj_surface = SURFACE(surface);
804     int w, h;
805     dri_bo *region;
806     dri_bo *subpic_region;
807     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
808     struct object_image *obj_image = IMAGE(obj_subpic->image);
809     assert(obj_surface);
810     assert(obj_surface->bo);
811     w = obj_surface->width;
812     h = obj_surface->height;
813     region = obj_surface->bo;
814     subpic_region = obj_image->bo;
815     /*subpicture surface*/
816     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
817     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
818 }
819
820 static void
821 i965_render_dest_surface_state(VADriverContextP ctx, int index)
822 {
823     struct i965_driver_data *i965 = i965_driver_data(ctx);  
824     struct i965_render_state *render_state = &i965->render_state;
825     struct intel_region *dest_region = render_state->draw_region;
826     void *ss;
827     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
828     int format;
829     assert(index < MAX_RENDER_SURFACES);
830
831     if (dest_region->cpp == 2) {
832         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
833     } else {
834         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
835     }
836
837     dri_bo_map(ss_bo, 1);
838     assert(ss_bo->virtual);
839     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
840
841     if (IS_GEN7(i965->intel.device_id)) {
842         gen7_render_set_surface_state(ss,
843                                       dest_region->bo, 0,
844                                       dest_region->width, dest_region->height,
845                                       dest_region->pitch, format);
846         dri_bo_emit_reloc(ss_bo,
847                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
848                           0,
849                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
850                           dest_region->bo);
851     } else {
852         i965_render_set_surface_state(ss,
853                                       dest_region->bo, 0,
854                                       dest_region->width, dest_region->height,
855                                       dest_region->pitch, format);
856         dri_bo_emit_reloc(ss_bo,
857                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
858                           0,
859                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
860                           dest_region->bo);
861     }
862
863     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
864     dri_bo_unmap(ss_bo);
865 }
866
867 static void 
868 i965_subpic_render_upload_vertex(VADriverContextP ctx,
869                                  VASurfaceID surface,
870                                  const VARectangle *output_rect)
871 {    
872     struct i965_driver_data  *i965         = i965_driver_data(ctx);
873     struct i965_render_state *render_state = &i965->render_state;
874     struct object_surface    *obj_surface  = SURFACE(surface);
875     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
876     VARectangle dst_rect;
877     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
878     int i = 0;
879
880     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
881         dst_rect = obj_subpic->dst_rect;
882     else {
883         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
884         const float sy  = (float)output_rect->height / obj_surface->orig_height;
885         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
886         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
887         dst_rect.width  = sx * obj_subpic->dst_rect.width;
888         dst_rect.height = sy * obj_subpic->dst_rect.height;
889     }
890
891     dri_bo_map(render_state->vb.vertex_buffer, 1);
892     assert(render_state->vb.vertex_buffer->virtual);
893     vb = render_state->vb.vertex_buffer->virtual;
894
895     tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width;
896     ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height;
897     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
898     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
899
900     x1 = (float)dst_rect.x;
901     y1 = (float)dst_rect.y;
902     x2 = (float)(dst_rect.x + dst_rect.width);
903     y2 = (float)(dst_rect.y + dst_rect.height);
904
905     vb[i++] = tx2;
906     vb[i++] = ty2;
907     vb[i++] = x2;
908     vb[i++] = y2;
909
910     vb[i++] = tx1;
911     vb[i++] = ty2;
912     vb[i++] = x1;
913     vb[i++] = y2;
914
915     vb[i++] = tx1;
916     vb[i++] = ty1;
917     vb[i++] = x1;
918     vb[i++] = y1;
919     dri_bo_unmap(render_state->vb.vertex_buffer);
920 }
921
922 static void 
923 i965_render_upload_vertex(
924     VADriverContextP   ctx,
925     VASurfaceID        surface,
926     const VARectangle *src_rect,
927     const VARectangle *dst_rect
928 )
929 {
930     struct i965_driver_data *i965 = i965_driver_data(ctx);
931     struct i965_render_state *render_state = &i965->render_state;
932     struct intel_region *dest_region = render_state->draw_region;
933     struct object_surface *obj_surface;
934     float *vb;
935
936     float u1, v1, u2, v2;
937     int i, width, height;
938     int box_x1 = dest_region->x + dst_rect->x;
939     int box_y1 = dest_region->y + dst_rect->y;
940     int box_x2 = box_x1 + dst_rect->width;
941     int box_y2 = box_y1 + dst_rect->height;
942
943     obj_surface = SURFACE(surface);
944     assert(surface);
945     width = obj_surface->orig_width;
946     height = obj_surface->orig_height;
947
948     u1 = (float)src_rect->x / width;
949     v1 = (float)src_rect->y / height;
950     u2 = (float)(src_rect->x + src_rect->width) / width;
951     v2 = (float)(src_rect->y + src_rect->height) / height;
952
953     dri_bo_map(render_state->vb.vertex_buffer, 1);
954     assert(render_state->vb.vertex_buffer->virtual);
955     vb = render_state->vb.vertex_buffer->virtual;
956
957     i = 0;
958     vb[i++] = u2;
959     vb[i++] = v2;
960     vb[i++] = (float)box_x2;
961     vb[i++] = (float)box_y2;
962     
963     vb[i++] = u1;
964     vb[i++] = v2;
965     vb[i++] = (float)box_x1;
966     vb[i++] = (float)box_y2;
967
968     vb[i++] = u1;
969     vb[i++] = v1;
970     vb[i++] = (float)box_x1;
971     vb[i++] = (float)box_y1;
972
973     dri_bo_unmap(render_state->vb.vertex_buffer);
974 }
975
976 static void
977 i965_render_upload_constants(VADriverContextP ctx,
978                              VASurfaceID surface)
979 {
980     struct i965_driver_data *i965 = i965_driver_data(ctx);
981     struct i965_render_state *render_state = &i965->render_state;
982     unsigned short *constant_buffer;
983     struct object_surface *obj_surface = SURFACE(surface);
984
985     dri_bo_map(render_state->curbe.bo, 1);
986     assert(render_state->curbe.bo->virtual);
987     constant_buffer = render_state->curbe.bo->virtual;
988
989     if (obj_surface->fourcc == VA_FOURCC('N','V','1','2'))
990         *constant_buffer = 1;
991     else
992         *constant_buffer = 0;
993
994     dri_bo_unmap(render_state->curbe.bo);
995 }
996
997 static void
998 i965_surface_render_state_setup(
999     VADriverContextP   ctx,
1000     VASurfaceID        surface,
1001     const VARectangle *src_rect,
1002     const VARectangle *dst_rect
1003 )
1004 {
1005     i965_render_vs_unit(ctx);
1006     i965_render_sf_unit(ctx);
1007     i965_render_dest_surface_state(ctx, 0);
1008     i965_render_src_surfaces_state(ctx, surface);
1009     i965_render_sampler(ctx);
1010     i965_render_wm_unit(ctx);
1011     i965_render_cc_viewport(ctx);
1012     i965_render_cc_unit(ctx);
1013     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1014     i965_render_upload_constants(ctx, surface);
1015 }
1016 static void
1017 i965_subpic_render_state_setup(
1018     VADriverContextP   ctx,
1019     VASurfaceID        surface,
1020     const VARectangle *src_rect,
1021     const VARectangle *dst_rect
1022 )
1023 {
1024     i965_render_vs_unit(ctx);
1025     i965_render_sf_unit(ctx);
1026     i965_render_dest_surface_state(ctx, 0);
1027     i965_subpic_render_src_surfaces_state(ctx, surface);
1028     i965_render_sampler(ctx);
1029     i965_subpic_render_wm_unit(ctx);
1030     i965_render_cc_viewport(ctx);
1031     i965_subpic_render_cc_unit(ctx);
1032     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1033 }
1034
1035
1036 static void
1037 i965_render_pipeline_select(VADriverContextP ctx)
1038 {
1039     struct i965_driver_data *i965 = i965_driver_data(ctx);
1040     struct intel_batchbuffer *batch = i965->batch;
1041  
1042     BEGIN_BATCH(batch, 1);
1043     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1044     ADVANCE_BATCH(batch);
1045 }
1046
1047 static void
1048 i965_render_state_sip(VADriverContextP ctx)
1049 {
1050     struct i965_driver_data *i965 = i965_driver_data(ctx);
1051     struct intel_batchbuffer *batch = i965->batch;
1052
1053     BEGIN_BATCH(batch, 2);
1054     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1055     OUT_BATCH(batch, 0);
1056     ADVANCE_BATCH(batch);
1057 }
1058
1059 static void
1060 i965_render_state_base_address(VADriverContextP ctx)
1061 {
1062     struct i965_driver_data *i965 = i965_driver_data(ctx);
1063     struct intel_batchbuffer *batch = i965->batch;
1064     struct i965_render_state *render_state = &i965->render_state;
1065
1066     if (IS_IRONLAKE(i965->intel.device_id)) {
1067         BEGIN_BATCH(batch, 8);
1068         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1069         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1070         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1071         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1072         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1073         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1074         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1075         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1076         ADVANCE_BATCH(batch);
1077     } else {
1078         BEGIN_BATCH(batch, 6);
1079         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1080         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1081         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1082         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1083         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1084         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1085         ADVANCE_BATCH(batch);
1086     }
1087 }
1088
1089 static void
1090 i965_render_binding_table_pointers(VADriverContextP ctx)
1091 {
1092     struct i965_driver_data *i965 = i965_driver_data(ctx);
1093     struct intel_batchbuffer *batch = i965->batch;
1094
1095     BEGIN_BATCH(batch, 6);
1096     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1097     OUT_BATCH(batch, 0); /* vs */
1098     OUT_BATCH(batch, 0); /* gs */
1099     OUT_BATCH(batch, 0); /* clip */
1100     OUT_BATCH(batch, 0); /* sf */
1101     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1102     ADVANCE_BATCH(batch);
1103 }
1104
1105 static void 
1106 i965_render_constant_color(VADriverContextP ctx)
1107 {
1108     struct i965_driver_data *i965 = i965_driver_data(ctx);
1109     struct intel_batchbuffer *batch = i965->batch;
1110
1111     BEGIN_BATCH(batch, 5);
1112     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1113     OUT_BATCH(batch, float_to_uint(1.0));
1114     OUT_BATCH(batch, float_to_uint(0.0));
1115     OUT_BATCH(batch, float_to_uint(1.0));
1116     OUT_BATCH(batch, float_to_uint(1.0));
1117     ADVANCE_BATCH(batch);
1118 }
1119
1120 static void
1121 i965_render_pipelined_pointers(VADriverContextP ctx)
1122 {
1123     struct i965_driver_data *i965 = i965_driver_data(ctx);
1124     struct intel_batchbuffer *batch = i965->batch;
1125     struct i965_render_state *render_state = &i965->render_state;
1126
1127     BEGIN_BATCH(batch, 7);
1128     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1129     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1130     OUT_BATCH(batch, 0);  /* disable GS */
1131     OUT_BATCH(batch, 0);  /* disable CLIP */
1132     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1133     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1134     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1135     ADVANCE_BATCH(batch);
1136 }
1137
1138 static void
1139 i965_render_urb_layout(VADriverContextP ctx)
1140 {
1141     struct i965_driver_data *i965 = i965_driver_data(ctx);
1142     struct intel_batchbuffer *batch = i965->batch;
1143     int urb_vs_start, urb_vs_size;
1144     int urb_gs_start, urb_gs_size;
1145     int urb_clip_start, urb_clip_size;
1146     int urb_sf_start, urb_sf_size;
1147     int urb_cs_start, urb_cs_size;
1148
1149     urb_vs_start = 0;
1150     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1151     urb_gs_start = urb_vs_start + urb_vs_size;
1152     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1153     urb_clip_start = urb_gs_start + urb_gs_size;
1154     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1155     urb_sf_start = urb_clip_start + urb_clip_size;
1156     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1157     urb_cs_start = urb_sf_start + urb_sf_size;
1158     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1159
1160     BEGIN_BATCH(batch, 3);
1161     OUT_BATCH(batch, 
1162               CMD_URB_FENCE |
1163               UF0_CS_REALLOC |
1164               UF0_SF_REALLOC |
1165               UF0_CLIP_REALLOC |
1166               UF0_GS_REALLOC |
1167               UF0_VS_REALLOC |
1168               1);
1169     OUT_BATCH(batch, 
1170               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1171               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1172               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1173     OUT_BATCH(batch,
1174               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1175               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1176     ADVANCE_BATCH(batch);
1177 }
1178
1179 static void 
1180 i965_render_cs_urb_layout(VADriverContextP ctx)
1181 {
1182     struct i965_driver_data *i965 = i965_driver_data(ctx);
1183     struct intel_batchbuffer *batch = i965->batch;
1184
1185     BEGIN_BATCH(batch, 2);
1186     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1187     OUT_BATCH(batch,
1188               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1189               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1190     ADVANCE_BATCH(batch);
1191 }
1192
1193 static void
1194 i965_render_constant_buffer(VADriverContextP ctx)
1195 {
1196     struct i965_driver_data *i965 = i965_driver_data(ctx);
1197     struct intel_batchbuffer *batch = i965->batch;
1198     struct i965_render_state *render_state = &i965->render_state;
1199
1200     BEGIN_BATCH(batch, 2);
1201     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1202     OUT_RELOC(batch, render_state->curbe.bo,
1203               I915_GEM_DOMAIN_INSTRUCTION, 0,
1204               URB_CS_ENTRY_SIZE - 1);
1205     ADVANCE_BATCH(batch);    
1206 }
1207
1208 static void
1209 i965_render_drawing_rectangle(VADriverContextP ctx)
1210 {
1211     struct i965_driver_data *i965 = i965_driver_data(ctx);
1212     struct intel_batchbuffer *batch = i965->batch;
1213     struct i965_render_state *render_state = &i965->render_state;
1214     struct intel_region *dest_region = render_state->draw_region;
1215
1216     BEGIN_BATCH(batch, 4);
1217     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1218     OUT_BATCH(batch, 0x00000000);
1219     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1220     OUT_BATCH(batch, 0x00000000);         
1221     ADVANCE_BATCH(batch);
1222 }
1223
1224 static void
1225 i965_render_vertex_elements(VADriverContextP ctx)
1226 {
1227     struct i965_driver_data *i965 = i965_driver_data(ctx);
1228     struct intel_batchbuffer *batch = i965->batch;
1229
1230     if (IS_IRONLAKE(i965->intel.device_id)) {
1231         BEGIN_BATCH(batch, 5);
1232         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1233         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1234         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1235                   VE0_VALID |
1236                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1237                   (0 << VE0_OFFSET_SHIFT));
1238         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1239                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1240                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1241                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1242         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1243         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1244                   VE0_VALID |
1245                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1246                   (8 << VE0_OFFSET_SHIFT));
1247         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1248                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1249                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1250                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1251         ADVANCE_BATCH(batch);
1252     } else {
1253         BEGIN_BATCH(batch, 5);
1254         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1255         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1256         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1257                   VE0_VALID |
1258                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1259                   (0 << VE0_OFFSET_SHIFT));
1260         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1261                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1262                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1263                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1264                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1265         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1266         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1267                   VE0_VALID |
1268                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1269                   (8 << VE0_OFFSET_SHIFT));
1270         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1271                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1272                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1273                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1274                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1275         ADVANCE_BATCH(batch);
1276     }
1277 }
1278
1279 static void
1280 i965_render_upload_image_palette(
1281     VADriverContextP ctx,
1282     VAImageID        image_id,
1283     unsigned int     alpha
1284 )
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct intel_batchbuffer *batch = i965->batch;
1288     unsigned int i;
1289
1290     struct object_image *obj_image = IMAGE(image_id);
1291     assert(obj_image);
1292
1293     if (obj_image->image.num_palette_entries == 0)
1294         return;
1295
1296     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1297     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1298     /*fill palette*/
1299     //int32_t out[16]; //0-23:color 23-31:alpha
1300     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1301         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1302     ADVANCE_BATCH(batch);
1303 }
1304
1305 static void
1306 i965_render_startup(VADriverContextP ctx)
1307 {
1308     struct i965_driver_data *i965 = i965_driver_data(ctx);
1309     struct intel_batchbuffer *batch = i965->batch;
1310     struct i965_render_state *render_state = &i965->render_state;
1311
1312     BEGIN_BATCH(batch, 11);
1313     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1314     OUT_BATCH(batch, 
1315               (0 << VB0_BUFFER_INDEX_SHIFT) |
1316               VB0_VERTEXDATA |
1317               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1318     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1319
1320     if (IS_IRONLAKE(i965->intel.device_id))
1321         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1322     else
1323         OUT_BATCH(batch, 3);
1324
1325     OUT_BATCH(batch, 0);
1326
1327     OUT_BATCH(batch, 
1328               CMD_3DPRIMITIVE |
1329               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1330               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1331               (0 << 9) |
1332               4);
1333     OUT_BATCH(batch, 3); /* vertex count per instance */
1334     OUT_BATCH(batch, 0); /* start vertex offset */
1335     OUT_BATCH(batch, 1); /* single instance */
1336     OUT_BATCH(batch, 0); /* start instance location */
1337     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1338     ADVANCE_BATCH(batch);
1339 }
1340
1341 static void 
1342 i965_clear_dest_region(VADriverContextP ctx)
1343 {
1344     struct i965_driver_data *i965 = i965_driver_data(ctx);
1345     struct intel_batchbuffer *batch = i965->batch;
1346     struct i965_render_state *render_state = &i965->render_state;
1347     struct intel_region *dest_region = render_state->draw_region;
1348     unsigned int blt_cmd, br13;
1349     int pitch;
1350
1351     blt_cmd = XY_COLOR_BLT_CMD;
1352     br13 = 0xf0 << 16;
1353     pitch = dest_region->pitch;
1354
1355     if (dest_region->cpp == 4) {
1356         br13 |= BR13_8888;
1357         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1358     } else {
1359         assert(dest_region->cpp == 2);
1360         br13 |= BR13_565;
1361     }
1362
1363     if (dest_region->tiling != I915_TILING_NONE) {
1364         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1365         pitch /= 4;
1366     }
1367
1368     br13 |= pitch;
1369
1370     if (IS_GEN6(i965->intel.device_id) ||
1371         IS_GEN7(i965->intel.device_id)) {
1372         intel_batchbuffer_start_atomic_blt(batch, 24);
1373         BEGIN_BLT_BATCH(batch, 6);
1374     } else {
1375         intel_batchbuffer_start_atomic(batch, 24);
1376         BEGIN_BATCH(batch, 6);
1377     }
1378
1379     OUT_BATCH(batch, blt_cmd);
1380     OUT_BATCH(batch, br13);
1381     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1382     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1383               (dest_region->x + dest_region->width));
1384     OUT_RELOC(batch, dest_region->bo, 
1385               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1386               0);
1387     OUT_BATCH(batch, 0x0);
1388     ADVANCE_BATCH(batch);
1389     intel_batchbuffer_end_atomic(batch);
1390 }
1391
1392 static void
1393 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1394 {
1395     struct i965_driver_data *i965 = i965_driver_data(ctx);
1396     struct intel_batchbuffer *batch = i965->batch;
1397
1398     i965_clear_dest_region(ctx);
1399     intel_batchbuffer_start_atomic(batch, 0x1000);
1400     intel_batchbuffer_emit_mi_flush(batch);
1401     i965_render_pipeline_select(ctx);
1402     i965_render_state_sip(ctx);
1403     i965_render_state_base_address(ctx);
1404     i965_render_binding_table_pointers(ctx);
1405     i965_render_constant_color(ctx);
1406     i965_render_pipelined_pointers(ctx);
1407     i965_render_urb_layout(ctx);
1408     i965_render_cs_urb_layout(ctx);
1409     i965_render_constant_buffer(ctx);
1410     i965_render_drawing_rectangle(ctx);
1411     i965_render_vertex_elements(ctx);
1412     i965_render_startup(ctx);
1413     intel_batchbuffer_end_atomic(batch);
1414 }
1415
1416 static void
1417 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1418 {
1419     struct i965_driver_data *i965 = i965_driver_data(ctx);
1420     struct intel_batchbuffer *batch = i965->batch;
1421
1422     intel_batchbuffer_start_atomic(batch, 0x1000);
1423     intel_batchbuffer_emit_mi_flush(batch);
1424     i965_render_pipeline_select(ctx);
1425     i965_render_state_sip(ctx);
1426     i965_render_state_base_address(ctx);
1427     i965_render_binding_table_pointers(ctx);
1428     i965_render_constant_color(ctx);
1429     i965_render_pipelined_pointers(ctx);
1430     i965_render_urb_layout(ctx);
1431     i965_render_cs_urb_layout(ctx);
1432     i965_render_drawing_rectangle(ctx);
1433     i965_render_vertex_elements(ctx);
1434     i965_render_startup(ctx);
1435     intel_batchbuffer_end_atomic(batch);
1436 }
1437
1438
1439 static void 
1440 i965_render_initialize(VADriverContextP ctx)
1441 {
1442     struct i965_driver_data *i965 = i965_driver_data(ctx);
1443     struct i965_render_state *render_state = &i965->render_state;
1444     dri_bo *bo;
1445
1446     /* VERTEX BUFFER */
1447     dri_bo_unreference(render_state->vb.vertex_buffer);
1448     bo = dri_bo_alloc(i965->intel.bufmgr,
1449                       "vertex buffer",
1450                       4096,
1451                       4096);
1452     assert(bo);
1453     render_state->vb.vertex_buffer = bo;
1454
1455     /* VS */
1456     dri_bo_unreference(render_state->vs.state);
1457     bo = dri_bo_alloc(i965->intel.bufmgr,
1458                       "vs state",
1459                       sizeof(struct i965_vs_unit_state),
1460                       64);
1461     assert(bo);
1462     render_state->vs.state = bo;
1463
1464     /* GS */
1465     /* CLIP */
1466     /* SF */
1467     dri_bo_unreference(render_state->sf.state);
1468     bo = dri_bo_alloc(i965->intel.bufmgr,
1469                       "sf state",
1470                       sizeof(struct i965_sf_unit_state),
1471                       64);
1472     assert(bo);
1473     render_state->sf.state = bo;
1474
1475     /* WM */
1476     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1477     bo = dri_bo_alloc(i965->intel.bufmgr,
1478                       "surface state & binding table",
1479                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1480                       4096);
1481     assert(bo);
1482     render_state->wm.surface_state_binding_table_bo = bo;
1483
1484     dri_bo_unreference(render_state->wm.sampler);
1485     bo = dri_bo_alloc(i965->intel.bufmgr,
1486                       "sampler state",
1487                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1488                       64);
1489     assert(bo);
1490     render_state->wm.sampler = bo;
1491     render_state->wm.sampler_count = 0;
1492
1493     dri_bo_unreference(render_state->wm.state);
1494     bo = dri_bo_alloc(i965->intel.bufmgr,
1495                       "wm state",
1496                       sizeof(struct i965_wm_unit_state),
1497                       64);
1498     assert(bo);
1499     render_state->wm.state = bo;
1500
1501     /* COLOR CALCULATOR */
1502     dri_bo_unreference(render_state->cc.state);
1503     bo = dri_bo_alloc(i965->intel.bufmgr,
1504                       "color calc state",
1505                       sizeof(struct i965_cc_unit_state),
1506                       64);
1507     assert(bo);
1508     render_state->cc.state = bo;
1509
1510     dri_bo_unreference(render_state->cc.viewport);
1511     bo = dri_bo_alloc(i965->intel.bufmgr,
1512                       "cc viewport",
1513                       sizeof(struct i965_cc_viewport),
1514                       64);
1515     assert(bo);
1516     render_state->cc.viewport = bo;
1517 }
1518
1519 static void
1520 i965_render_put_surface(
1521     VADriverContextP   ctx,
1522     VASurfaceID        surface,
1523     const VARectangle *src_rect,
1524     const VARectangle *dst_rect,
1525     unsigned int       flags
1526 )
1527 {
1528     struct i965_driver_data *i965 = i965_driver_data(ctx);
1529     struct intel_batchbuffer *batch = i965->batch;
1530
1531     i965_render_initialize(ctx);
1532     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect);
1533     i965_surface_render_pipeline_setup(ctx);
1534     intel_batchbuffer_flush(batch);
1535 }
1536
1537 static void
1538 i965_render_put_subpicture(
1539     VADriverContextP   ctx,
1540     VASurfaceID        surface,
1541     const VARectangle *src_rect,
1542     const VARectangle *dst_rect
1543 )
1544 {
1545     struct i965_driver_data *i965 = i965_driver_data(ctx);
1546     struct intel_batchbuffer *batch = i965->batch;
1547     struct object_surface *obj_surface = SURFACE(surface);
1548     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1549
1550     assert(obj_subpic);
1551
1552     i965_render_initialize(ctx);
1553     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1554     i965_subpic_render_pipeline_setup(ctx);
1555     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1556     intel_batchbuffer_flush(batch);
1557 }
1558
1559 /*
1560  * for GEN6+
1561  */
1562 static void 
1563 gen6_render_initialize(VADriverContextP ctx)
1564 {
1565     struct i965_driver_data *i965 = i965_driver_data(ctx);
1566     struct i965_render_state *render_state = &i965->render_state;
1567     dri_bo *bo;
1568
1569     /* VERTEX BUFFER */
1570     dri_bo_unreference(render_state->vb.vertex_buffer);
1571     bo = dri_bo_alloc(i965->intel.bufmgr,
1572                       "vertex buffer",
1573                       4096,
1574                       4096);
1575     assert(bo);
1576     render_state->vb.vertex_buffer = bo;
1577
1578     /* WM */
1579     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1580     bo = dri_bo_alloc(i965->intel.bufmgr,
1581                       "surface state & binding table",
1582                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1583                       4096);
1584     assert(bo);
1585     render_state->wm.surface_state_binding_table_bo = bo;
1586
1587     dri_bo_unreference(render_state->wm.sampler);
1588     bo = dri_bo_alloc(i965->intel.bufmgr,
1589                       "sampler state",
1590                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1591                       4096);
1592     assert(bo);
1593     render_state->wm.sampler = bo;
1594     render_state->wm.sampler_count = 0;
1595
1596     /* COLOR CALCULATOR */
1597     dri_bo_unreference(render_state->cc.state);
1598     bo = dri_bo_alloc(i965->intel.bufmgr,
1599                       "color calc state",
1600                       sizeof(struct gen6_color_calc_state),
1601                       4096);
1602     assert(bo);
1603     render_state->cc.state = bo;
1604
1605     /* CC VIEWPORT */
1606     dri_bo_unreference(render_state->cc.viewport);
1607     bo = dri_bo_alloc(i965->intel.bufmgr,
1608                       "cc viewport",
1609                       sizeof(struct i965_cc_viewport),
1610                       4096);
1611     assert(bo);
1612     render_state->cc.viewport = bo;
1613
1614     /* BLEND STATE */
1615     dri_bo_unreference(render_state->cc.blend);
1616     bo = dri_bo_alloc(i965->intel.bufmgr,
1617                       "blend state",
1618                       sizeof(struct gen6_blend_state),
1619                       4096);
1620     assert(bo);
1621     render_state->cc.blend = bo;
1622
1623     /* DEPTH & STENCIL STATE */
1624     dri_bo_unreference(render_state->cc.depth_stencil);
1625     bo = dri_bo_alloc(i965->intel.bufmgr,
1626                       "depth & stencil state",
1627                       sizeof(struct gen6_depth_stencil_state),
1628                       4096);
1629     assert(bo);
1630     render_state->cc.depth_stencil = bo;
1631 }
1632
1633 static void
1634 gen6_render_color_calc_state(VADriverContextP ctx)
1635 {
1636     struct i965_driver_data *i965 = i965_driver_data(ctx);
1637     struct i965_render_state *render_state = &i965->render_state;
1638     struct gen6_color_calc_state *color_calc_state;
1639     
1640     dri_bo_map(render_state->cc.state, 1);
1641     assert(render_state->cc.state->virtual);
1642     color_calc_state = render_state->cc.state->virtual;
1643     memset(color_calc_state, 0, sizeof(*color_calc_state));
1644     color_calc_state->constant_r = 1.0;
1645     color_calc_state->constant_g = 0.0;
1646     color_calc_state->constant_b = 1.0;
1647     color_calc_state->constant_a = 1.0;
1648     dri_bo_unmap(render_state->cc.state);
1649 }
1650
1651 static void
1652 gen6_render_blend_state(VADriverContextP ctx)
1653 {
1654     struct i965_driver_data *i965 = i965_driver_data(ctx);
1655     struct i965_render_state *render_state = &i965->render_state;
1656     struct gen6_blend_state *blend_state;
1657     
1658     dri_bo_map(render_state->cc.blend, 1);
1659     assert(render_state->cc.blend->virtual);
1660     blend_state = render_state->cc.blend->virtual;
1661     memset(blend_state, 0, sizeof(*blend_state));
1662     blend_state->blend1.logic_op_enable = 1;
1663     blend_state->blend1.logic_op_func = 0xc;
1664     dri_bo_unmap(render_state->cc.blend);
1665 }
1666
1667 static void
1668 gen6_render_depth_stencil_state(VADriverContextP ctx)
1669 {
1670     struct i965_driver_data *i965 = i965_driver_data(ctx);
1671     struct i965_render_state *render_state = &i965->render_state;
1672     struct gen6_depth_stencil_state *depth_stencil_state;
1673     
1674     dri_bo_map(render_state->cc.depth_stencil, 1);
1675     assert(render_state->cc.depth_stencil->virtual);
1676     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1677     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1678     dri_bo_unmap(render_state->cc.depth_stencil);
1679 }
1680
1681 static void
1682 gen6_render_setup_states(
1683     VADriverContextP   ctx,
1684     VASurfaceID        surface,
1685     const VARectangle *src_rect,
1686     const VARectangle *dst_rect
1687 )
1688 {
1689     i965_render_dest_surface_state(ctx, 0);
1690     i965_render_src_surfaces_state(ctx, surface);
1691     i965_render_sampler(ctx);
1692     i965_render_cc_viewport(ctx);
1693     gen6_render_color_calc_state(ctx);
1694     gen6_render_blend_state(ctx);
1695     gen6_render_depth_stencil_state(ctx);
1696     i965_render_upload_constants(ctx, surface);
1697     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1698 }
1699
1700 static void
1701 gen6_emit_invarient_states(VADriverContextP ctx)
1702 {
1703     struct i965_driver_data *i965 = i965_driver_data(ctx);
1704     struct intel_batchbuffer *batch = i965->batch;
1705
1706     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1707
1708     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1709     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1710               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1711     OUT_BATCH(batch, 0);
1712
1713     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1714     OUT_BATCH(batch, 1);
1715
1716     /* Set system instruction pointer */
1717     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1718     OUT_BATCH(batch, 0);
1719 }
1720
1721 static void
1722 gen6_emit_state_base_address(VADriverContextP ctx)
1723 {
1724     struct i965_driver_data *i965 = i965_driver_data(ctx);
1725     struct intel_batchbuffer *batch = i965->batch;
1726     struct i965_render_state *render_state = &i965->render_state;
1727
1728     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1729     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1730     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1731     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1732     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1733     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1734     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1735     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1736     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1737     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1738 }
1739
1740 static void
1741 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1742 {
1743     struct i965_driver_data *i965 = i965_driver_data(ctx);
1744     struct intel_batchbuffer *batch = i965->batch;
1745     struct i965_render_state *render_state = &i965->render_state;
1746
1747     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1748               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1749               (4 - 2));
1750     OUT_BATCH(batch, 0);
1751     OUT_BATCH(batch, 0);
1752     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1753 }
1754
1755 static void
1756 gen6_emit_urb(VADriverContextP ctx)
1757 {
1758     struct i965_driver_data *i965 = i965_driver_data(ctx);
1759     struct intel_batchbuffer *batch = i965->batch;
1760
1761     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1762     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1763               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1764     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1765               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1766 }
1767
1768 static void
1769 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1770 {
1771     struct i965_driver_data *i965 = i965_driver_data(ctx);
1772     struct intel_batchbuffer *batch = i965->batch;
1773     struct i965_render_state *render_state = &i965->render_state;
1774
1775     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1776     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1777     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1778     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1779 }
1780
1781 static void
1782 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1783 {
1784     struct i965_driver_data *i965 = i965_driver_data(ctx);
1785     struct intel_batchbuffer *batch = i965->batch;
1786     struct i965_render_state *render_state = &i965->render_state;
1787
1788     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1789               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1790               (4 - 2));
1791     OUT_BATCH(batch, 0); /* VS */
1792     OUT_BATCH(batch, 0); /* GS */
1793     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1794 }
1795
1796 static void
1797 gen6_emit_binding_table(VADriverContextP ctx)
1798 {
1799     struct i965_driver_data *i965 = i965_driver_data(ctx);
1800     struct intel_batchbuffer *batch = i965->batch;
1801
1802     /* Binding table pointers */
1803     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1804               GEN6_BINDING_TABLE_MODIFY_PS |
1805               (4 - 2));
1806     OUT_BATCH(batch, 0);                /* vs */
1807     OUT_BATCH(batch, 0);                /* gs */
1808     /* Only the PS uses the binding table */
1809     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1810 }
1811
1812 static void
1813 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1814 {
1815     struct i965_driver_data *i965 = i965_driver_data(ctx);
1816     struct intel_batchbuffer *batch = i965->batch;
1817
1818     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1819     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1820               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1821     OUT_BATCH(batch, 0);
1822     OUT_BATCH(batch, 0);
1823     OUT_BATCH(batch, 0);
1824     OUT_BATCH(batch, 0);
1825     OUT_BATCH(batch, 0);
1826
1827     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1828     OUT_BATCH(batch, 0);
1829 }
1830
1831 static void
1832 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1833 {
1834     i965_render_drawing_rectangle(ctx);
1835 }
1836
1837 static void 
1838 gen6_emit_vs_state(VADriverContextP ctx)
1839 {
1840     struct i965_driver_data *i965 = i965_driver_data(ctx);
1841     struct intel_batchbuffer *batch = i965->batch;
1842
1843     /* disable VS constant buffer */
1844     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1845     OUT_BATCH(batch, 0);
1846     OUT_BATCH(batch, 0);
1847     OUT_BATCH(batch, 0);
1848     OUT_BATCH(batch, 0);
1849         
1850     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1851     OUT_BATCH(batch, 0); /* without VS kernel */
1852     OUT_BATCH(batch, 0);
1853     OUT_BATCH(batch, 0);
1854     OUT_BATCH(batch, 0);
1855     OUT_BATCH(batch, 0); /* pass-through */
1856 }
1857
1858 static void 
1859 gen6_emit_gs_state(VADriverContextP ctx)
1860 {
1861     struct i965_driver_data *i965 = i965_driver_data(ctx);
1862     struct intel_batchbuffer *batch = i965->batch;
1863
1864     /* disable GS constant buffer */
1865     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1866     OUT_BATCH(batch, 0);
1867     OUT_BATCH(batch, 0);
1868     OUT_BATCH(batch, 0);
1869     OUT_BATCH(batch, 0);
1870         
1871     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1872     OUT_BATCH(batch, 0); /* without GS kernel */
1873     OUT_BATCH(batch, 0);
1874     OUT_BATCH(batch, 0);
1875     OUT_BATCH(batch, 0);
1876     OUT_BATCH(batch, 0);
1877     OUT_BATCH(batch, 0); /* pass-through */
1878 }
1879
1880 static void 
1881 gen6_emit_clip_state(VADriverContextP ctx)
1882 {
1883     struct i965_driver_data *i965 = i965_driver_data(ctx);
1884     struct intel_batchbuffer *batch = i965->batch;
1885
1886     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1887     OUT_BATCH(batch, 0);
1888     OUT_BATCH(batch, 0); /* pass-through */
1889     OUT_BATCH(batch, 0);
1890 }
1891
1892 static void 
1893 gen6_emit_sf_state(VADriverContextP ctx)
1894 {
1895     struct i965_driver_data *i965 = i965_driver_data(ctx);
1896     struct intel_batchbuffer *batch = i965->batch;
1897
1898     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1899     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1900               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1901               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1902     OUT_BATCH(batch, 0);
1903     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1904     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1905     OUT_BATCH(batch, 0);
1906     OUT_BATCH(batch, 0);
1907     OUT_BATCH(batch, 0);
1908     OUT_BATCH(batch, 0);
1909     OUT_BATCH(batch, 0); /* DW9 */
1910     OUT_BATCH(batch, 0);
1911     OUT_BATCH(batch, 0);
1912     OUT_BATCH(batch, 0);
1913     OUT_BATCH(batch, 0);
1914     OUT_BATCH(batch, 0); /* DW14 */
1915     OUT_BATCH(batch, 0);
1916     OUT_BATCH(batch, 0);
1917     OUT_BATCH(batch, 0);
1918     OUT_BATCH(batch, 0);
1919     OUT_BATCH(batch, 0); /* DW19 */
1920 }
1921
1922 static void 
1923 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct intel_batchbuffer *batch = i965->batch;
1927     struct i965_render_state *render_state = &i965->render_state;
1928
1929     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1930               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1931               (5 - 2));
1932     OUT_RELOC(batch, 
1933               render_state->curbe.bo,
1934               I915_GEM_DOMAIN_INSTRUCTION, 0,
1935               0);
1936     OUT_BATCH(batch, 0);
1937     OUT_BATCH(batch, 0);
1938     OUT_BATCH(batch, 0);
1939
1940     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1941     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1942               I915_GEM_DOMAIN_INSTRUCTION, 0,
1943               0);
1944     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1945               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1946     OUT_BATCH(batch, 0);
1947     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1948     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1949               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1950               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1951     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1952               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1953     OUT_BATCH(batch, 0);
1954     OUT_BATCH(batch, 0);
1955 }
1956
1957 static void
1958 gen6_emit_vertex_element_state(VADriverContextP ctx)
1959 {
1960     struct i965_driver_data *i965 = i965_driver_data(ctx);
1961     struct intel_batchbuffer *batch = i965->batch;
1962
1963     /* Set up our vertex elements, sourced from the single vertex buffer. */
1964     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
1965     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1966     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1967               GEN6_VE0_VALID |
1968               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1969               (0 << VE0_OFFSET_SHIFT));
1970     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1971               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1972               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1973               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1974     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1975     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1976               GEN6_VE0_VALID |
1977               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1978               (8 << VE0_OFFSET_SHIFT));
1979     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1980               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1981               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1982               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1983 }
1984
1985 static void
1986 gen6_emit_vertices(VADriverContextP ctx)
1987 {
1988     struct i965_driver_data *i965 = i965_driver_data(ctx);
1989     struct intel_batchbuffer *batch = i965->batch;
1990     struct i965_render_state *render_state = &i965->render_state;
1991
1992     BEGIN_BATCH(batch, 11);
1993     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1994     OUT_BATCH(batch, 
1995               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1996               GEN6_VB0_VERTEXDATA |
1997               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1998     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1999     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2000     OUT_BATCH(batch, 0);
2001
2002     OUT_BATCH(batch, 
2003               CMD_3DPRIMITIVE |
2004               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2005               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2006               (0 << 9) |
2007               4);
2008     OUT_BATCH(batch, 3); /* vertex count per instance */
2009     OUT_BATCH(batch, 0); /* start vertex offset */
2010     OUT_BATCH(batch, 1); /* single instance */
2011     OUT_BATCH(batch, 0); /* start instance location */
2012     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2013     ADVANCE_BATCH(batch);
2014 }
2015
2016 static void
2017 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2018 {
2019     struct i965_driver_data *i965 = i965_driver_data(ctx);
2020     struct intel_batchbuffer *batch = i965->batch;
2021
2022     intel_batchbuffer_start_atomic(batch, 0x1000);
2023     intel_batchbuffer_emit_mi_flush(batch);
2024     gen6_emit_invarient_states(ctx);
2025     gen6_emit_state_base_address(ctx);
2026     gen6_emit_viewport_state_pointers(ctx);
2027     gen6_emit_urb(ctx);
2028     gen6_emit_cc_state_pointers(ctx);
2029     gen6_emit_sampler_state_pointers(ctx);
2030     gen6_emit_vs_state(ctx);
2031     gen6_emit_gs_state(ctx);
2032     gen6_emit_clip_state(ctx);
2033     gen6_emit_sf_state(ctx);
2034     gen6_emit_wm_state(ctx, kernel);
2035     gen6_emit_binding_table(ctx);
2036     gen6_emit_depth_buffer_state(ctx);
2037     gen6_emit_drawing_rectangle(ctx);
2038     gen6_emit_vertex_element_state(ctx);
2039     gen6_emit_vertices(ctx);
2040     intel_batchbuffer_end_atomic(batch);
2041 }
2042
2043 static void
2044 gen6_render_put_surface(
2045     VADriverContextP   ctx,
2046     VASurfaceID        surface,
2047     const VARectangle *src_rect,
2048     const VARectangle *dst_rect,
2049     unsigned int       flags
2050 )
2051 {
2052     struct i965_driver_data *i965 = i965_driver_data(ctx);
2053     struct intel_batchbuffer *batch = i965->batch;
2054
2055     gen6_render_initialize(ctx);
2056     gen6_render_setup_states(ctx, surface, src_rect, dst_rect);
2057     i965_clear_dest_region(ctx);
2058     gen6_render_emit_states(ctx, PS_KERNEL);
2059     intel_batchbuffer_flush(batch);
2060 }
2061
2062 static void
2063 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2064 {
2065     struct i965_driver_data *i965 = i965_driver_data(ctx);
2066     struct i965_render_state *render_state = &i965->render_state;
2067     struct gen6_blend_state *blend_state;
2068
2069     dri_bo_unmap(render_state->cc.state);    
2070     dri_bo_map(render_state->cc.blend, 1);
2071     assert(render_state->cc.blend->virtual);
2072     blend_state = render_state->cc.blend->virtual;
2073     memset(blend_state, 0, sizeof(*blend_state));
2074     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2075     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2076     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2077     blend_state->blend0.blend_enable = 1;
2078     blend_state->blend1.post_blend_clamp_enable = 1;
2079     blend_state->blend1.pre_blend_clamp_enable = 1;
2080     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2081     dri_bo_unmap(render_state->cc.blend);
2082 }
2083
2084 static void
2085 gen6_subpicture_render_setup_states(
2086     VADriverContextP   ctx,
2087     VASurfaceID        surface,
2088     const VARectangle *src_rect,
2089     const VARectangle *dst_rect
2090 )
2091 {
2092     i965_render_dest_surface_state(ctx, 0);
2093     i965_subpic_render_src_surfaces_state(ctx, surface);
2094     i965_render_sampler(ctx);
2095     i965_render_cc_viewport(ctx);
2096     gen6_render_color_calc_state(ctx);
2097     gen6_subpicture_render_blend_state(ctx);
2098     gen6_render_depth_stencil_state(ctx);
2099     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2100 }
2101
2102 static void
2103 gen6_render_put_subpicture(
2104     VADriverContextP   ctx,
2105     VASurfaceID        surface,
2106     const VARectangle *src_rect,
2107     const VARectangle *dst_rect
2108 )
2109 {
2110     struct i965_driver_data *i965 = i965_driver_data(ctx);
2111     struct intel_batchbuffer *batch = i965->batch;
2112     struct object_surface *obj_surface = SURFACE(surface);
2113     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2114
2115     assert(obj_subpic);
2116     gen6_render_initialize(ctx);
2117     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2118     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2119     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2120     intel_batchbuffer_flush(batch);
2121 }
2122
2123 /*
2124  * for GEN7
2125  */
2126 static void 
2127 gen7_render_initialize(VADriverContextP ctx)
2128 {
2129     struct i965_driver_data *i965 = i965_driver_data(ctx);
2130     struct i965_render_state *render_state = &i965->render_state;
2131     dri_bo *bo;
2132
2133     /* VERTEX BUFFER */
2134     dri_bo_unreference(render_state->vb.vertex_buffer);
2135     bo = dri_bo_alloc(i965->intel.bufmgr,
2136                       "vertex buffer",
2137                       4096,
2138                       4096);
2139     assert(bo);
2140     render_state->vb.vertex_buffer = bo;
2141
2142     /* WM */
2143     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2144     bo = dri_bo_alloc(i965->intel.bufmgr,
2145                       "surface state & binding table",
2146                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2147                       4096);
2148     assert(bo);
2149     render_state->wm.surface_state_binding_table_bo = bo;
2150
2151     dri_bo_unreference(render_state->wm.sampler);
2152     bo = dri_bo_alloc(i965->intel.bufmgr,
2153                       "sampler state",
2154                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2155                       4096);
2156     assert(bo);
2157     render_state->wm.sampler = bo;
2158     render_state->wm.sampler_count = 0;
2159
2160     /* COLOR CALCULATOR */
2161     dri_bo_unreference(render_state->cc.state);
2162     bo = dri_bo_alloc(i965->intel.bufmgr,
2163                       "color calc state",
2164                       sizeof(struct gen6_color_calc_state),
2165                       4096);
2166     assert(bo);
2167     render_state->cc.state = bo;
2168
2169     /* CC VIEWPORT */
2170     dri_bo_unreference(render_state->cc.viewport);
2171     bo = dri_bo_alloc(i965->intel.bufmgr,
2172                       "cc viewport",
2173                       sizeof(struct i965_cc_viewport),
2174                       4096);
2175     assert(bo);
2176     render_state->cc.viewport = bo;
2177
2178     /* BLEND STATE */
2179     dri_bo_unreference(render_state->cc.blend);
2180     bo = dri_bo_alloc(i965->intel.bufmgr,
2181                       "blend state",
2182                       sizeof(struct gen6_blend_state),
2183                       4096);
2184     assert(bo);
2185     render_state->cc.blend = bo;
2186
2187     /* DEPTH & STENCIL STATE */
2188     dri_bo_unreference(render_state->cc.depth_stencil);
2189     bo = dri_bo_alloc(i965->intel.bufmgr,
2190                       "depth & stencil state",
2191                       sizeof(struct gen6_depth_stencil_state),
2192                       4096);
2193     assert(bo);
2194     render_state->cc.depth_stencil = bo;
2195 }
2196
2197 static void
2198 gen7_render_color_calc_state(VADriverContextP ctx)
2199 {
2200     struct i965_driver_data *i965 = i965_driver_data(ctx);
2201     struct i965_render_state *render_state = &i965->render_state;
2202     struct gen6_color_calc_state *color_calc_state;
2203     
2204     dri_bo_map(render_state->cc.state, 1);
2205     assert(render_state->cc.state->virtual);
2206     color_calc_state = render_state->cc.state->virtual;
2207     memset(color_calc_state, 0, sizeof(*color_calc_state));
2208     color_calc_state->constant_r = 1.0;
2209     color_calc_state->constant_g = 0.0;
2210     color_calc_state->constant_b = 1.0;
2211     color_calc_state->constant_a = 1.0;
2212     dri_bo_unmap(render_state->cc.state);
2213 }
2214
2215 static void
2216 gen7_render_blend_state(VADriverContextP ctx)
2217 {
2218     struct i965_driver_data *i965 = i965_driver_data(ctx);
2219     struct i965_render_state *render_state = &i965->render_state;
2220     struct gen6_blend_state *blend_state;
2221     
2222     dri_bo_map(render_state->cc.blend, 1);
2223     assert(render_state->cc.blend->virtual);
2224     blend_state = render_state->cc.blend->virtual;
2225     memset(blend_state, 0, sizeof(*blend_state));
2226     blend_state->blend1.logic_op_enable = 1;
2227     blend_state->blend1.logic_op_func = 0xc;
2228     blend_state->blend1.pre_blend_clamp_enable = 1;
2229     dri_bo_unmap(render_state->cc.blend);
2230 }
2231
2232 static void
2233 gen7_render_depth_stencil_state(VADriverContextP ctx)
2234 {
2235     struct i965_driver_data *i965 = i965_driver_data(ctx);
2236     struct i965_render_state *render_state = &i965->render_state;
2237     struct gen6_depth_stencil_state *depth_stencil_state;
2238     
2239     dri_bo_map(render_state->cc.depth_stencil, 1);
2240     assert(render_state->cc.depth_stencil->virtual);
2241     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2242     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2243     dri_bo_unmap(render_state->cc.depth_stencil);
2244 }
2245
2246 static void 
2247 gen7_render_sampler(VADriverContextP ctx)
2248 {
2249     struct i965_driver_data *i965 = i965_driver_data(ctx);
2250     struct i965_render_state *render_state = &i965->render_state;
2251     struct gen7_sampler_state *sampler_state;
2252     int i;
2253     
2254     assert(render_state->wm.sampler_count > 0);
2255     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2256
2257     dri_bo_map(render_state->wm.sampler, 1);
2258     assert(render_state->wm.sampler->virtual);
2259     sampler_state = render_state->wm.sampler->virtual;
2260     for (i = 0; i < render_state->wm.sampler_count; i++) {
2261         memset(sampler_state, 0, sizeof(*sampler_state));
2262         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2263         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2264         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2265         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2266         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2267         sampler_state++;
2268     }
2269
2270     dri_bo_unmap(render_state->wm.sampler);
2271 }
2272
2273 static void
2274 gen7_render_setup_states(
2275     VADriverContextP   ctx,
2276     VASurfaceID        surface,
2277     const VARectangle *src_rect,
2278     const VARectangle *dst_rect
2279 )
2280 {
2281     i965_render_dest_surface_state(ctx, 0);
2282     i965_render_src_surfaces_state(ctx, surface);
2283     gen7_render_sampler(ctx);
2284     i965_render_cc_viewport(ctx);
2285     gen7_render_color_calc_state(ctx);
2286     gen7_render_blend_state(ctx);
2287     gen7_render_depth_stencil_state(ctx);
2288     i965_render_upload_constants(ctx, surface);
2289     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2290 }
2291
2292 static void
2293 gen7_emit_invarient_states(VADriverContextP ctx)
2294 {
2295     struct i965_driver_data *i965 = i965_driver_data(ctx);
2296     struct intel_batchbuffer *batch = i965->batch;
2297
2298     BEGIN_BATCH(batch, 1);
2299     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2300     ADVANCE_BATCH(batch);
2301
2302     BEGIN_BATCH(batch, 4);
2303     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2304     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2305               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2306     OUT_BATCH(batch, 0);
2307     OUT_BATCH(batch, 0);
2308     ADVANCE_BATCH(batch);
2309
2310     BEGIN_BATCH(batch, 2);
2311     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2312     OUT_BATCH(batch, 1);
2313     ADVANCE_BATCH(batch);
2314
2315     /* Set system instruction pointer */
2316     BEGIN_BATCH(batch, 2);
2317     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2318     OUT_BATCH(batch, 0);
2319     ADVANCE_BATCH(batch);
2320 }
2321
2322 static void
2323 gen7_emit_state_base_address(VADriverContextP ctx)
2324 {
2325     struct i965_driver_data *i965 = i965_driver_data(ctx);
2326     struct intel_batchbuffer *batch = i965->batch;
2327     struct i965_render_state *render_state = &i965->render_state;
2328
2329     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2330     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2331     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2332     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2333     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2334     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2335     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2336     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2337     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2338     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2339 }
2340
2341 static void
2342 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2343 {
2344     struct i965_driver_data *i965 = i965_driver_data(ctx);
2345     struct intel_batchbuffer *batch = i965->batch;
2346     struct i965_render_state *render_state = &i965->render_state;
2347
2348     BEGIN_BATCH(batch, 2);
2349     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2350     OUT_RELOC(batch,
2351               render_state->cc.viewport,
2352               I915_GEM_DOMAIN_INSTRUCTION, 0,
2353               0);
2354     ADVANCE_BATCH(batch);
2355
2356     BEGIN_BATCH(batch, 2);
2357     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2358     OUT_BATCH(batch, 0);
2359     ADVANCE_BATCH(batch);
2360 }
2361
2362 /*
2363  * URB layout on GEN7 
2364  * ----------------------------------------
2365  * | PS Push Constants (8KB) | VS entries |
2366  * ----------------------------------------
2367  */
2368 static void
2369 gen7_emit_urb(VADriverContextP ctx)
2370 {
2371     struct i965_driver_data *i965 = i965_driver_data(ctx);
2372     struct intel_batchbuffer *batch = i965->batch;
2373
2374     BEGIN_BATCH(batch, 2);
2375     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2376     OUT_BATCH(batch, 8); /* in 1KBs */
2377     ADVANCE_BATCH(batch);
2378
2379     BEGIN_BATCH(batch, 2);
2380     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2381     OUT_BATCH(batch, 
2382               (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
2383               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2384               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2385    ADVANCE_BATCH(batch);
2386
2387    BEGIN_BATCH(batch, 2);
2388    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2389    OUT_BATCH(batch,
2390              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2391              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2392    ADVANCE_BATCH(batch);
2393
2394    BEGIN_BATCH(batch, 2);
2395    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2396    OUT_BATCH(batch,
2397              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2398              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2399    ADVANCE_BATCH(batch);
2400
2401    BEGIN_BATCH(batch, 2);
2402    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2403    OUT_BATCH(batch,
2404              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2405              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2406    ADVANCE_BATCH(batch);
2407 }
2408
2409 static void
2410 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2411 {
2412     struct i965_driver_data *i965 = i965_driver_data(ctx);
2413     struct intel_batchbuffer *batch = i965->batch;
2414     struct i965_render_state *render_state = &i965->render_state;
2415
2416     BEGIN_BATCH(batch, 2);
2417     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2418     OUT_RELOC(batch,
2419               render_state->cc.state,
2420               I915_GEM_DOMAIN_INSTRUCTION, 0,
2421               1);
2422     ADVANCE_BATCH(batch);
2423
2424     BEGIN_BATCH(batch, 2);
2425     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2426     OUT_RELOC(batch,
2427               render_state->cc.blend,
2428               I915_GEM_DOMAIN_INSTRUCTION, 0,
2429               1);
2430     ADVANCE_BATCH(batch);
2431
2432     BEGIN_BATCH(batch, 2);
2433     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2434     OUT_RELOC(batch,
2435               render_state->cc.depth_stencil,
2436               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2437               1);
2438     ADVANCE_BATCH(batch);
2439 }
2440
2441 static void
2442 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2443 {
2444     struct i965_driver_data *i965 = i965_driver_data(ctx);
2445     struct intel_batchbuffer *batch = i965->batch;
2446     struct i965_render_state *render_state = &i965->render_state;
2447
2448     BEGIN_BATCH(batch, 2);
2449     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2450     OUT_RELOC(batch,
2451               render_state->wm.sampler,
2452               I915_GEM_DOMAIN_INSTRUCTION, 0,
2453               0);
2454     ADVANCE_BATCH(batch);
2455 }
2456
2457 static void
2458 gen7_emit_binding_table(VADriverContextP ctx)
2459 {
2460     struct i965_driver_data *i965 = i965_driver_data(ctx);
2461     struct intel_batchbuffer *batch = i965->batch;
2462
2463     BEGIN_BATCH(batch, 2);
2464     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2465     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2466     ADVANCE_BATCH(batch);
2467 }
2468
2469 static void
2470 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2471 {
2472     struct i965_driver_data *i965 = i965_driver_data(ctx);
2473     struct intel_batchbuffer *batch = i965->batch;
2474
2475     BEGIN_BATCH(batch, 7);
2476     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2477     OUT_BATCH(batch,
2478               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2479               (I965_SURFACE_NULL << 29));
2480     OUT_BATCH(batch, 0);
2481     OUT_BATCH(batch, 0);
2482     OUT_BATCH(batch, 0);
2483     OUT_BATCH(batch, 0);
2484     OUT_BATCH(batch, 0);
2485     ADVANCE_BATCH(batch);
2486
2487     BEGIN_BATCH(batch, 3);
2488     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2489     OUT_BATCH(batch, 0);
2490     OUT_BATCH(batch, 0);
2491     ADVANCE_BATCH(batch);
2492 }
2493
2494 static void
2495 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2496 {
2497     i965_render_drawing_rectangle(ctx);
2498 }
2499
2500 static void 
2501 gen7_emit_vs_state(VADriverContextP ctx)
2502 {
2503     struct i965_driver_data *i965 = i965_driver_data(ctx);
2504     struct intel_batchbuffer *batch = i965->batch;
2505
2506     /* disable VS constant buffer */
2507     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2508     OUT_BATCH(batch, 0);
2509     OUT_BATCH(batch, 0);
2510     OUT_BATCH(batch, 0);
2511     OUT_BATCH(batch, 0);
2512     OUT_BATCH(batch, 0);
2513     OUT_BATCH(batch, 0);
2514         
2515     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2516     OUT_BATCH(batch, 0); /* without VS kernel */
2517     OUT_BATCH(batch, 0);
2518     OUT_BATCH(batch, 0);
2519     OUT_BATCH(batch, 0);
2520     OUT_BATCH(batch, 0); /* pass-through */
2521 }
2522
2523 static void 
2524 gen7_emit_bypass_state(VADriverContextP ctx)
2525 {
2526     struct i965_driver_data *i965 = i965_driver_data(ctx);
2527     struct intel_batchbuffer *batch = i965->batch;
2528
2529     /* bypass GS */
2530     BEGIN_BATCH(batch, 7);
2531     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2532     OUT_BATCH(batch, 0);
2533     OUT_BATCH(batch, 0);
2534     OUT_BATCH(batch, 0);
2535     OUT_BATCH(batch, 0);
2536     OUT_BATCH(batch, 0);
2537     OUT_BATCH(batch, 0);
2538     ADVANCE_BATCH(batch);
2539
2540     BEGIN_BATCH(batch, 7);      
2541     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2542     OUT_BATCH(batch, 0); /* without GS kernel */
2543     OUT_BATCH(batch, 0);
2544     OUT_BATCH(batch, 0);
2545     OUT_BATCH(batch, 0);
2546     OUT_BATCH(batch, 0);
2547     OUT_BATCH(batch, 0); /* pass-through */
2548     ADVANCE_BATCH(batch);
2549
2550     BEGIN_BATCH(batch, 2);
2551     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2552     OUT_BATCH(batch, 0);
2553     ADVANCE_BATCH(batch);
2554
2555     /* disable HS */
2556     BEGIN_BATCH(batch, 7);
2557     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2558     OUT_BATCH(batch, 0);
2559     OUT_BATCH(batch, 0);
2560     OUT_BATCH(batch, 0);
2561     OUT_BATCH(batch, 0);
2562     OUT_BATCH(batch, 0);
2563     OUT_BATCH(batch, 0);
2564     ADVANCE_BATCH(batch);
2565
2566     BEGIN_BATCH(batch, 7);
2567     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2568     OUT_BATCH(batch, 0);
2569     OUT_BATCH(batch, 0);
2570     OUT_BATCH(batch, 0);
2571     OUT_BATCH(batch, 0);
2572     OUT_BATCH(batch, 0);
2573     OUT_BATCH(batch, 0);
2574     ADVANCE_BATCH(batch);
2575
2576     BEGIN_BATCH(batch, 2);
2577     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2578     OUT_BATCH(batch, 0);
2579     ADVANCE_BATCH(batch);
2580
2581     /* Disable TE */
2582     BEGIN_BATCH(batch, 4);
2583     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2584     OUT_BATCH(batch, 0);
2585     OUT_BATCH(batch, 0);
2586     OUT_BATCH(batch, 0);
2587     ADVANCE_BATCH(batch);
2588
2589     /* Disable DS */
2590     BEGIN_BATCH(batch, 7);
2591     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2592     OUT_BATCH(batch, 0);
2593     OUT_BATCH(batch, 0);
2594     OUT_BATCH(batch, 0);
2595     OUT_BATCH(batch, 0);
2596     OUT_BATCH(batch, 0);
2597     OUT_BATCH(batch, 0);
2598     ADVANCE_BATCH(batch);
2599
2600     BEGIN_BATCH(batch, 6);
2601     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2602     OUT_BATCH(batch, 0);
2603     OUT_BATCH(batch, 0);
2604     OUT_BATCH(batch, 0);
2605     OUT_BATCH(batch, 0);
2606     OUT_BATCH(batch, 0);
2607     ADVANCE_BATCH(batch);
2608
2609     BEGIN_BATCH(batch, 2);
2610     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2611     OUT_BATCH(batch, 0);
2612     ADVANCE_BATCH(batch);
2613
2614     /* Disable STREAMOUT */
2615     BEGIN_BATCH(batch, 3);
2616     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2617     OUT_BATCH(batch, 0);
2618     OUT_BATCH(batch, 0);
2619     ADVANCE_BATCH(batch);
2620 }
2621
2622 static void 
2623 gen7_emit_clip_state(VADriverContextP ctx)
2624 {
2625     struct i965_driver_data *i965 = i965_driver_data(ctx);
2626     struct intel_batchbuffer *batch = i965->batch;
2627
2628     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2629     OUT_BATCH(batch, 0);
2630     OUT_BATCH(batch, 0); /* pass-through */
2631     OUT_BATCH(batch, 0);
2632 }
2633
2634 static void 
2635 gen7_emit_sf_state(VADriverContextP ctx)
2636 {
2637     struct i965_driver_data *i965 = i965_driver_data(ctx);
2638     struct intel_batchbuffer *batch = i965->batch;
2639
2640     BEGIN_BATCH(batch, 14);
2641     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2642     OUT_BATCH(batch,
2643               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2644               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2645               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2646     OUT_BATCH(batch, 0);
2647     OUT_BATCH(batch, 0);
2648     OUT_BATCH(batch, 0); /* DW4 */
2649     OUT_BATCH(batch, 0);
2650     OUT_BATCH(batch, 0);
2651     OUT_BATCH(batch, 0);
2652     OUT_BATCH(batch, 0);
2653     OUT_BATCH(batch, 0); /* DW9 */
2654     OUT_BATCH(batch, 0);
2655     OUT_BATCH(batch, 0);
2656     OUT_BATCH(batch, 0);
2657     OUT_BATCH(batch, 0);
2658     ADVANCE_BATCH(batch);
2659
2660     BEGIN_BATCH(batch, 7);
2661     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2664     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2665     OUT_BATCH(batch, 0);
2666     OUT_BATCH(batch, 0);
2667     OUT_BATCH(batch, 0);
2668     ADVANCE_BATCH(batch);
2669 }
2670
2671 static void 
2672 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2673 {
2674     struct i965_driver_data *i965 = i965_driver_data(ctx);
2675     struct intel_batchbuffer *batch = i965->batch;
2676     struct i965_render_state *render_state = &i965->render_state;
2677
2678     BEGIN_BATCH(batch, 3);
2679     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2680     OUT_BATCH(batch,
2681               GEN7_WM_DISPATCH_ENABLE |
2682               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2683     OUT_BATCH(batch, 0);
2684     ADVANCE_BATCH(batch);
2685
2686     BEGIN_BATCH(batch, 7);
2687     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2688     OUT_BATCH(batch, 1);
2689     OUT_BATCH(batch, 0);
2690     OUT_RELOC(batch, 
2691               render_state->curbe.bo,
2692               I915_GEM_DOMAIN_INSTRUCTION, 0,
2693               0);
2694     OUT_BATCH(batch, 0);
2695     OUT_BATCH(batch, 0);
2696     OUT_BATCH(batch, 0);
2697     ADVANCE_BATCH(batch);
2698
2699     BEGIN_BATCH(batch, 8);
2700     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2701     OUT_RELOC(batch, 
2702               render_state->render_kernels[kernel].bo,
2703               I915_GEM_DOMAIN_INSTRUCTION, 0,
2704               0);
2705     OUT_BATCH(batch, 
2706               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2707               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2708     OUT_BATCH(batch, 0); /* scratch space base offset */
2709     OUT_BATCH(batch, 
2710               ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
2711               GEN7_PS_PUSH_CONSTANT_ENABLE |
2712               GEN7_PS_ATTRIBUTE_ENABLE |
2713               GEN7_PS_16_DISPATCH_ENABLE);
2714     OUT_BATCH(batch, 
2715               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2716     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2717     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2718     ADVANCE_BATCH(batch);
2719 }
2720
2721 static void
2722 gen7_emit_vertex_element_state(VADriverContextP ctx)
2723 {
2724     struct i965_driver_data *i965 = i965_driver_data(ctx);
2725     struct intel_batchbuffer *batch = i965->batch;
2726
2727     /* Set up our vertex elements, sourced from the single vertex buffer. */
2728     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2729     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2730     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2731               GEN6_VE0_VALID |
2732               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2733               (0 << VE0_OFFSET_SHIFT));
2734     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2735               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2736               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2737               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2738     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2739     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2740               GEN6_VE0_VALID |
2741               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2742               (8 << VE0_OFFSET_SHIFT));
2743     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2744               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2745               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2746               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2747 }
2748
2749 static void
2750 gen7_emit_vertices(VADriverContextP ctx)
2751 {
2752     struct i965_driver_data *i965 = i965_driver_data(ctx);
2753     struct intel_batchbuffer *batch = i965->batch;
2754     struct i965_render_state *render_state = &i965->render_state;
2755
2756     BEGIN_BATCH(batch, 5);
2757     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2758     OUT_BATCH(batch, 
2759               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2760               GEN6_VB0_VERTEXDATA |
2761               GEN7_VB0_ADDRESS_MODIFYENABLE |
2762               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2763     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2764     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2765     OUT_BATCH(batch, 0);
2766     ADVANCE_BATCH(batch);
2767
2768     BEGIN_BATCH(batch, 7);
2769     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2770     OUT_BATCH(batch,
2771               _3DPRIM_RECTLIST |
2772               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2773     OUT_BATCH(batch, 3); /* vertex count per instance */
2774     OUT_BATCH(batch, 0); /* start vertex offset */
2775     OUT_BATCH(batch, 1); /* single instance */
2776     OUT_BATCH(batch, 0); /* start instance location */
2777     OUT_BATCH(batch, 0);
2778     ADVANCE_BATCH(batch);
2779 }
2780
2781 static void
2782 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2783 {
2784     struct i965_driver_data *i965 = i965_driver_data(ctx);
2785     struct intel_batchbuffer *batch = i965->batch;
2786
2787     intel_batchbuffer_start_atomic(batch, 0x1000);
2788     intel_batchbuffer_emit_mi_flush(batch);
2789     gen7_emit_invarient_states(ctx);
2790     gen7_emit_state_base_address(ctx);
2791     gen7_emit_viewport_state_pointers(ctx);
2792     gen7_emit_urb(ctx);
2793     gen7_emit_cc_state_pointers(ctx);
2794     gen7_emit_sampler_state_pointers(ctx);
2795     gen7_emit_bypass_state(ctx);
2796     gen7_emit_vs_state(ctx);
2797     gen7_emit_clip_state(ctx);
2798     gen7_emit_sf_state(ctx);
2799     gen7_emit_wm_state(ctx, kernel);
2800     gen7_emit_binding_table(ctx);
2801     gen7_emit_depth_buffer_state(ctx);
2802     gen7_emit_drawing_rectangle(ctx);
2803     gen7_emit_vertex_element_state(ctx);
2804     gen7_emit_vertices(ctx);
2805     intel_batchbuffer_end_atomic(batch);
2806 }
2807
2808 static void
2809 gen7_render_put_surface(
2810     VADriverContextP   ctx,
2811     VASurfaceID        surface,
2812     const VARectangle *src_rect,
2813     const VARectangle *dst_rect,
2814     unsigned int       flags
2815 )
2816 {
2817     struct i965_driver_data *i965 = i965_driver_data(ctx);
2818     struct intel_batchbuffer *batch = i965->batch;
2819
2820     gen7_render_initialize(ctx);
2821     gen7_render_setup_states(ctx, surface, src_rect, dst_rect);
2822     i965_clear_dest_region(ctx);
2823     gen7_render_emit_states(ctx, PS_KERNEL);
2824     intel_batchbuffer_flush(batch);
2825 }
2826
2827 static void
2828 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2829 {
2830     struct i965_driver_data *i965 = i965_driver_data(ctx);
2831     struct i965_render_state *render_state = &i965->render_state;
2832     struct gen6_blend_state *blend_state;
2833
2834     dri_bo_unmap(render_state->cc.state);    
2835     dri_bo_map(render_state->cc.blend, 1);
2836     assert(render_state->cc.blend->virtual);
2837     blend_state = render_state->cc.blend->virtual;
2838     memset(blend_state, 0, sizeof(*blend_state));
2839     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2840     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2841     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2842     blend_state->blend0.blend_enable = 1;
2843     blend_state->blend1.post_blend_clamp_enable = 1;
2844     blend_state->blend1.pre_blend_clamp_enable = 1;
2845     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2846     dri_bo_unmap(render_state->cc.blend);
2847 }
2848
2849 static void
2850 gen7_subpicture_render_setup_states(
2851     VADriverContextP   ctx,
2852     VASurfaceID        surface,
2853     const VARectangle *src_rect,
2854     const VARectangle *dst_rect
2855 )
2856 {
2857     i965_render_dest_surface_state(ctx, 0);
2858     i965_subpic_render_src_surfaces_state(ctx, surface);
2859     i965_render_sampler(ctx);
2860     i965_render_cc_viewport(ctx);
2861     gen7_render_color_calc_state(ctx);
2862     gen7_subpicture_render_blend_state(ctx);
2863     gen7_render_depth_stencil_state(ctx);
2864     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2865 }
2866
2867 static void
2868 gen7_render_put_subpicture(
2869     VADriverContextP   ctx,
2870     VASurfaceID        surface,
2871     const VARectangle *src_rect,
2872     const VARectangle *dst_rect
2873 )
2874 {
2875     struct i965_driver_data *i965 = i965_driver_data(ctx);
2876     struct intel_batchbuffer *batch = i965->batch;
2877     struct object_surface *obj_surface = SURFACE(surface);
2878     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2879
2880     assert(obj_subpic);
2881     gen7_render_initialize(ctx);
2882     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2883     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2884     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2885     intel_batchbuffer_flush(batch);
2886 }
2887
2888
2889 /*
2890  * global functions
2891  */
2892 VAStatus 
2893 i965_DestroySurfaces(VADriverContextP ctx,
2894                      VASurfaceID *surface_list,
2895                      int num_surfaces);
2896 void
2897 intel_render_put_surface(
2898     VADriverContextP   ctx,
2899     VASurfaceID        surface,
2900     const VARectangle *src_rect,
2901     const VARectangle *dst_rect,
2902     unsigned int       flags
2903 )
2904 {
2905     struct i965_driver_data *i965 = i965_driver_data(ctx);
2906     int has_done_scaling = 0;
2907     VASurfaceID in_surface_id = surface;
2908     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
2909
2910     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
2911
2912     if (out_surface_id != VA_INVALID_ID)
2913         in_surface_id = out_surface_id;
2914
2915     if (IS_GEN7(i965->intel.device_id))
2916         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2917     else if (IS_GEN6(i965->intel.device_id))
2918         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2919     else
2920         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2921
2922     if (in_surface_id != surface)
2923         i965_DestroySurfaces(ctx, &in_surface_id, 1);
2924 }
2925
2926 void
2927 intel_render_put_subpicture(
2928     VADriverContextP   ctx,
2929     VASurfaceID        surface,
2930     const VARectangle *src_rect,
2931     const VARectangle *dst_rect
2932 )
2933 {
2934     struct i965_driver_data *i965 = i965_driver_data(ctx);
2935
2936     if (IS_GEN7(i965->intel.device_id))
2937         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2938     else if (IS_GEN6(i965->intel.device_id))
2939         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2940     else
2941         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2942 }
2943
2944 Bool 
2945 i965_render_init(VADriverContextP ctx)
2946 {
2947     struct i965_driver_data *i965 = i965_driver_data(ctx);
2948     struct i965_render_state *render_state = &i965->render_state;
2949     int i;
2950
2951     /* kernel */
2952     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2953                                  sizeof(render_kernels_gen5[0])));
2954     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2955                                  sizeof(render_kernels_gen6[0])));
2956
2957     if (IS_GEN7(i965->intel.device_id))
2958         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
2959     else if (IS_GEN6(i965->intel.device_id))
2960         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2961     else if (IS_IRONLAKE(i965->intel.device_id))
2962         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2963     else
2964         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2965
2966     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2967         struct i965_kernel *kernel = &render_state->render_kernels[i];
2968
2969         if (!kernel->size)
2970             continue;
2971
2972         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2973                                   kernel->name, 
2974                                   kernel->size, 0x1000);
2975         assert(kernel->bo);
2976         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2977     }
2978
2979     /* constant buffer */
2980     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2981                       "constant buffer",
2982                       4096, 64);
2983     assert(render_state->curbe.bo);
2984
2985     return True;
2986 }
2987
2988 Bool 
2989 i965_render_terminate(VADriverContextP ctx)
2990 {
2991     int i;
2992     struct i965_driver_data *i965 = i965_driver_data(ctx);
2993     struct i965_render_state *render_state = &i965->render_state;
2994
2995     dri_bo_unreference(render_state->curbe.bo);
2996     render_state->curbe.bo = NULL;
2997
2998     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2999         struct i965_kernel *kernel = &render_state->render_kernels[i];
3000         
3001         dri_bo_unreference(kernel->bo);
3002         kernel->bo = NULL;
3003     }
3004
3005     dri_bo_unreference(render_state->vb.vertex_buffer);
3006     render_state->vb.vertex_buffer = NULL;
3007     dri_bo_unreference(render_state->vs.state);
3008     render_state->vs.state = NULL;
3009     dri_bo_unreference(render_state->sf.state);
3010     render_state->sf.state = NULL;
3011     dri_bo_unreference(render_state->wm.sampler);
3012     render_state->wm.sampler = NULL;
3013     dri_bo_unreference(render_state->wm.state);
3014     render_state->wm.state = NULL;
3015     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3016     dri_bo_unreference(render_state->cc.viewport);
3017     render_state->cc.viewport = NULL;
3018     dri_bo_unreference(render_state->cc.state);
3019     render_state->cc.state = NULL;
3020     dri_bo_unreference(render_state->cc.blend);
3021     render_state->cc.blend = NULL;
3022     dri_bo_unreference(render_state->cc.depth_stencil);
3023     render_state->cc.depth_stencil = NULL;
3024
3025     if (render_state->draw_region) {
3026         dri_bo_unreference(render_state->draw_region->bo);
3027         free(render_state->draw_region);
3028         render_state->draw_region = NULL;
3029     }
3030
3031     return True;
3032 }
3033