7a9ea135e8b667be87480d6cc08a65c872a9fe0a
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include "intel_batchbuffer.h"
40 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_structs.h"
44
45 #include "i965_render.h"
46
47 #define SF_KERNEL_NUM_GRF       16
48 #define SF_MAX_THREADS          1
49
50 static const uint32_t sf_kernel_static[][4] = 
51 {
52 #include "shaders/render/exa_sf.g4b"
53 };
54
55 #define PS_KERNEL_NUM_GRF       32
56 #define PS_MAX_THREADS          32
57
58 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
59
60 static const uint32_t ps_kernel_static[][4] = 
61 {
62 #include "shaders/render/exa_wm_xy.g4b"
63 #include "shaders/render/exa_wm_src_affine.g4b"
64 #include "shaders/render/exa_wm_src_sample_planar.g4b"
65 #include "shaders/render/exa_wm_yuv_rgb.g4b"
66 #include "shaders/render/exa_wm_write.g4b"
67 };
68 static const uint32_t ps_subpic_kernel_static[][4] = 
69 {
70 #include "shaders/render/exa_wm_xy.g4b"
71 #include "shaders/render/exa_wm_src_affine.g4b"
72 #include "shaders/render/exa_wm_src_sample_argb.g4b"
73 #include "shaders/render/exa_wm_write.g4b"
74 };
75
76 /* On IRONLAKE */
77 static const uint32_t sf_kernel_static_gen5[][4] = 
78 {
79 #include "shaders/render/exa_sf.g4b.gen5"
80 };
81
82 static const uint32_t ps_kernel_static_gen5[][4] = 
83 {
84 #include "shaders/render/exa_wm_xy.g4b.gen5"
85 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
86 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
87 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
88 #include "shaders/render/exa_wm_write.g4b.gen5"
89 };
90 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
91 {
92 #include "shaders/render/exa_wm_xy.g4b.gen5"
93 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
94 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
95 #include "shaders/render/exa_wm_write.g4b.gen5"
96 };
97
98 /* programs for Sandybridge */
99 static const uint32_t sf_kernel_static_gen6[][4] = 
100 {
101 };
102
103 static const uint32_t ps_kernel_static_gen6[][4] = {
104 #include "shaders/render/exa_wm_src_affine.g6b"
105 #include "shaders/render/exa_wm_src_sample_planar.g6b"
106 #include "shaders/render/exa_wm_yuv_rgb.g6b"
107 #include "shaders/render/exa_wm_write.g6b"
108 };
109
110 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
111 #include "shaders/render/exa_wm_src_affine.g6b"
112 #include "shaders/render/exa_wm_src_sample_argb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 /* programs for Ivybridge */
117 static const uint32_t sf_kernel_static_gen7[][4] = 
118 {
119 };
120
121 static const uint32_t ps_kernel_static_gen7[][4] = {
122 #include "shaders/render/exa_wm_src_affine.g7b"
123 #include "shaders/render/exa_wm_src_sample_planar.g7b"
124 #include "shaders/render/exa_wm_yuv_rgb.g7b"
125 #include "shaders/render/exa_wm_write.g7b"
126 };
127
128 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
129 #include "shaders/render/exa_wm_src_affine.g7b"
130 #include "shaders/render/exa_wm_src_sample_argb.g7b"
131 #include "shaders/render/exa_wm_write.g7b"
132 };
133
134 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
135 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
136 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
137 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
138 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
139
140 static uint32_t float_to_uint (float f) 
141 {
142     union {
143         uint32_t i; 
144         float f;
145     } x;
146
147     x.f = f;
148     return x.i;
149 }
150
151 enum 
152 {
153     SF_KERNEL = 0,
154     PS_KERNEL,
155     PS_SUBPIC_KERNEL
156 };
157
158 static struct i965_kernel render_kernels_gen4[] = {
159     {
160         "SF",
161         SF_KERNEL,
162         sf_kernel_static,
163         sizeof(sf_kernel_static),
164         NULL
165     },
166     {
167         "PS",
168         PS_KERNEL,
169         ps_kernel_static,
170         sizeof(ps_kernel_static),
171         NULL
172     },
173
174     {
175         "PS_SUBPIC",
176         PS_SUBPIC_KERNEL,
177         ps_subpic_kernel_static,
178         sizeof(ps_subpic_kernel_static),
179         NULL
180     }
181 };
182
183 static struct i965_kernel render_kernels_gen5[] = {
184     {
185         "SF",
186         SF_KERNEL,
187         sf_kernel_static_gen5,
188         sizeof(sf_kernel_static_gen5),
189         NULL
190     },
191     {
192         "PS",
193         PS_KERNEL,
194         ps_kernel_static_gen5,
195         sizeof(ps_kernel_static_gen5),
196         NULL
197     },
198
199     {
200         "PS_SUBPIC",
201         PS_SUBPIC_KERNEL,
202         ps_subpic_kernel_static_gen5,
203         sizeof(ps_subpic_kernel_static_gen5),
204         NULL
205     }
206 };
207
208 static struct i965_kernel render_kernels_gen6[] = {
209     {
210         "SF",
211         SF_KERNEL,
212         sf_kernel_static_gen6,
213         sizeof(sf_kernel_static_gen6),
214         NULL
215     },
216     {
217         "PS",
218         PS_KERNEL,
219         ps_kernel_static_gen6,
220         sizeof(ps_kernel_static_gen6),
221         NULL
222     },
223
224     {
225         "PS_SUBPIC",
226         PS_SUBPIC_KERNEL,
227         ps_subpic_kernel_static_gen6,
228         sizeof(ps_subpic_kernel_static_gen6),
229         NULL
230     }
231 };
232
233 static struct i965_kernel render_kernels_gen7[] = {
234     {
235         "SF",
236         SF_KERNEL,
237         sf_kernel_static_gen7,
238         sizeof(sf_kernel_static_gen7),
239         NULL
240     },
241     {
242         "PS",
243         PS_KERNEL,
244         ps_kernel_static_gen7,
245         sizeof(ps_kernel_static_gen7),
246         NULL
247     },
248
249     {
250         "PS_SUBPIC",
251         PS_SUBPIC_KERNEL,
252         ps_subpic_kernel_static_gen7,
253         sizeof(ps_subpic_kernel_static_gen7),
254         NULL
255     }
256 };
257
258 #define URB_VS_ENTRIES        8
259 #define URB_VS_ENTRY_SIZE     1
260
261 #define URB_GS_ENTRIES        0
262 #define URB_GS_ENTRY_SIZE     0
263
264 #define URB_CLIP_ENTRIES      0
265 #define URB_CLIP_ENTRY_SIZE   0
266
267 #define URB_SF_ENTRIES        1
268 #define URB_SF_ENTRY_SIZE     2
269
270 #define URB_CS_ENTRIES        1
271 #define URB_CS_ENTRY_SIZE     1
272
273 static void
274 i965_render_vs_unit(VADriverContextP ctx)
275 {
276     struct i965_driver_data *i965 = i965_driver_data(ctx);
277     struct i965_render_state *render_state = &i965->render_state;
278     struct i965_vs_unit_state *vs_state;
279
280     dri_bo_map(render_state->vs.state, 1);
281     assert(render_state->vs.state->virtual);
282     vs_state = render_state->vs.state->virtual;
283     memset(vs_state, 0, sizeof(*vs_state));
284
285     if (IS_IRONLAKE(i965->intel.device_id))
286         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
287     else
288         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
289
290     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
291     vs_state->vs6.vs_enable = 0;
292     vs_state->vs6.vert_cache_disable = 1;
293     
294     dri_bo_unmap(render_state->vs.state);
295 }
296
297 static void
298 i965_render_sf_unit(VADriverContextP ctx)
299 {
300     struct i965_driver_data *i965 = i965_driver_data(ctx);
301     struct i965_render_state *render_state = &i965->render_state;
302     struct i965_sf_unit_state *sf_state;
303
304     dri_bo_map(render_state->sf.state, 1);
305     assert(render_state->sf.state->virtual);
306     sf_state = render_state->sf.state->virtual;
307     memset(sf_state, 0, sizeof(*sf_state));
308
309     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
310     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
311
312     sf_state->sf1.single_program_flow = 1; /* XXX */
313     sf_state->sf1.binding_table_entry_count = 0;
314     sf_state->sf1.thread_priority = 0;
315     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
316     sf_state->sf1.illegal_op_exception_enable = 1;
317     sf_state->sf1.mask_stack_exception_enable = 1;
318     sf_state->sf1.sw_exception_enable = 1;
319
320     /* scratch space is not used in our kernel */
321     sf_state->thread2.per_thread_scratch_space = 0;
322     sf_state->thread2.scratch_space_base_pointer = 0;
323
324     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
325     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
326     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
327     sf_state->thread3.urb_entry_read_offset = 0;
328     sf_state->thread3.dispatch_grf_start_reg = 3;
329
330     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
331     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
332     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
333     sf_state->thread4.stats_enable = 1;
334
335     sf_state->sf5.viewport_transform = 0; /* skip viewport */
336
337     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
338     sf_state->sf6.scissor = 0;
339
340     sf_state->sf7.trifan_pv = 2;
341
342     sf_state->sf6.dest_org_vbias = 0x8;
343     sf_state->sf6.dest_org_hbias = 0x8;
344
345     dri_bo_emit_reloc(render_state->sf.state,
346                       I915_GEM_DOMAIN_INSTRUCTION, 0,
347                       sf_state->thread0.grf_reg_count << 1,
348                       offsetof(struct i965_sf_unit_state, thread0),
349                       render_state->render_kernels[SF_KERNEL].bo);
350
351     dri_bo_unmap(render_state->sf.state);
352 }
353
354 static void 
355 i965_render_sampler(VADriverContextP ctx)
356 {
357     struct i965_driver_data *i965 = i965_driver_data(ctx);
358     struct i965_render_state *render_state = &i965->render_state;
359     struct i965_sampler_state *sampler_state;
360     int i;
361     
362     assert(render_state->wm.sampler_count > 0);
363     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
364
365     dri_bo_map(render_state->wm.sampler, 1);
366     assert(render_state->wm.sampler->virtual);
367     sampler_state = render_state->wm.sampler->virtual;
368     for (i = 0; i < render_state->wm.sampler_count; i++) {
369         memset(sampler_state, 0, sizeof(*sampler_state));
370         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
371         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
372         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
373         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
374         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
375         sampler_state++;
376     }
377
378     dri_bo_unmap(render_state->wm.sampler);
379 }
380 static void
381 i965_subpic_render_wm_unit(VADriverContextP ctx)
382 {
383     struct i965_driver_data *i965 = i965_driver_data(ctx);
384     struct i965_render_state *render_state = &i965->render_state;
385     struct i965_wm_unit_state *wm_state;
386
387     assert(render_state->wm.sampler);
388
389     dri_bo_map(render_state->wm.state, 1);
390     assert(render_state->wm.state->virtual);
391     wm_state = render_state->wm.state->virtual;
392     memset(wm_state, 0, sizeof(*wm_state));
393
394     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
395     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
396
397     wm_state->thread1.single_program_flow = 1; /* XXX */
398
399     if (IS_IRONLAKE(i965->intel.device_id))
400         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
401     else
402         wm_state->thread1.binding_table_entry_count = 7;
403
404     wm_state->thread2.scratch_space_base_pointer = 0;
405     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
406
407     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
408     wm_state->thread3.const_urb_entry_read_length = 0;
409     wm_state->thread3.const_urb_entry_read_offset = 0;
410     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
411     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
412
413     wm_state->wm4.stats_enable = 0;
414     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
415
416     if (IS_IRONLAKE(i965->intel.device_id)) {
417         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
418         wm_state->wm5.max_threads = 12 * 6 - 1;
419     } else {
420         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
421         wm_state->wm5.max_threads = 10 * 5 - 1;
422     }
423
424     wm_state->wm5.thread_dispatch_enable = 1;
425     wm_state->wm5.enable_16_pix = 1;
426     wm_state->wm5.enable_8_pix = 0;
427     wm_state->wm5.early_depth_test = 1;
428
429     dri_bo_emit_reloc(render_state->wm.state,
430                       I915_GEM_DOMAIN_INSTRUCTION, 0,
431                       wm_state->thread0.grf_reg_count << 1,
432                       offsetof(struct i965_wm_unit_state, thread0),
433                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
434
435     dri_bo_emit_reloc(render_state->wm.state,
436                       I915_GEM_DOMAIN_INSTRUCTION, 0,
437                       wm_state->wm4.sampler_count << 2,
438                       offsetof(struct i965_wm_unit_state, wm4),
439                       render_state->wm.sampler);
440
441     dri_bo_unmap(render_state->wm.state);
442 }
443
444
445 static void
446 i965_render_wm_unit(VADriverContextP ctx)
447 {
448     struct i965_driver_data *i965 = i965_driver_data(ctx);
449     struct i965_render_state *render_state = &i965->render_state;
450     struct i965_wm_unit_state *wm_state;
451
452     assert(render_state->wm.sampler);
453
454     dri_bo_map(render_state->wm.state, 1);
455     assert(render_state->wm.state->virtual);
456     wm_state = render_state->wm.state->virtual;
457     memset(wm_state, 0, sizeof(*wm_state));
458
459     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
460     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
461
462     wm_state->thread1.single_program_flow = 1; /* XXX */
463
464     if (IS_IRONLAKE(i965->intel.device_id))
465         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
466     else
467         wm_state->thread1.binding_table_entry_count = 7;
468
469     wm_state->thread2.scratch_space_base_pointer = 0;
470     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
471
472     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
473     wm_state->thread3.const_urb_entry_read_length = 1;
474     wm_state->thread3.const_urb_entry_read_offset = 0;
475     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
476     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
477
478     wm_state->wm4.stats_enable = 0;
479     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
480
481     if (IS_IRONLAKE(i965->intel.device_id)) {
482         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
483         wm_state->wm5.max_threads = 12 * 6 - 1;
484     } else {
485         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
486         wm_state->wm5.max_threads = 10 * 5 - 1;
487     }
488
489     wm_state->wm5.thread_dispatch_enable = 1;
490     wm_state->wm5.enable_16_pix = 1;
491     wm_state->wm5.enable_8_pix = 0;
492     wm_state->wm5.early_depth_test = 1;
493
494     dri_bo_emit_reloc(render_state->wm.state,
495                       I915_GEM_DOMAIN_INSTRUCTION, 0,
496                       wm_state->thread0.grf_reg_count << 1,
497                       offsetof(struct i965_wm_unit_state, thread0),
498                       render_state->render_kernels[PS_KERNEL].bo);
499
500     dri_bo_emit_reloc(render_state->wm.state,
501                       I915_GEM_DOMAIN_INSTRUCTION, 0,
502                       wm_state->wm4.sampler_count << 2,
503                       offsetof(struct i965_wm_unit_state, wm4),
504                       render_state->wm.sampler);
505
506     dri_bo_unmap(render_state->wm.state);
507 }
508
509 static void 
510 i965_render_cc_viewport(VADriverContextP ctx)
511 {
512     struct i965_driver_data *i965 = i965_driver_data(ctx);
513     struct i965_render_state *render_state = &i965->render_state;
514     struct i965_cc_viewport *cc_viewport;
515
516     dri_bo_map(render_state->cc.viewport, 1);
517     assert(render_state->cc.viewport->virtual);
518     cc_viewport = render_state->cc.viewport->virtual;
519     memset(cc_viewport, 0, sizeof(*cc_viewport));
520     
521     cc_viewport->min_depth = -1.e35;
522     cc_viewport->max_depth = 1.e35;
523
524     dri_bo_unmap(render_state->cc.viewport);
525 }
526
527 static void 
528 i965_subpic_render_cc_unit(VADriverContextP ctx)
529 {
530     struct i965_driver_data *i965 = i965_driver_data(ctx);
531     struct i965_render_state *render_state = &i965->render_state;
532     struct i965_cc_unit_state *cc_state;
533
534     assert(render_state->cc.viewport);
535
536     dri_bo_map(render_state->cc.state, 1);
537     assert(render_state->cc.state->virtual);
538     cc_state = render_state->cc.state->virtual;
539     memset(cc_state, 0, sizeof(*cc_state));
540
541     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
542     cc_state->cc2.depth_test = 0;       /* disable depth test */
543     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
544     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
545     cc_state->cc3.blend_enable = 1;     /* enable color blend */
546     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
547     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
548     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
549     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
550
551     cc_state->cc5.dither_enable = 0;    /* disable dither */
552     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
553     cc_state->cc5.statistics_enable = 1;
554     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
555     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
556     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
557
558     cc_state->cc6.clamp_post_alpha_blend = 0; 
559     cc_state->cc6.clamp_pre_alpha_blend  =0; 
560     
561     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
562     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
563     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
564     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
565    
566     /*alpha test reference*/
567     cc_state->cc7.alpha_ref.f =0.0 ;
568
569
570     dri_bo_emit_reloc(render_state->cc.state,
571                       I915_GEM_DOMAIN_INSTRUCTION, 0,
572                       0,
573                       offsetof(struct i965_cc_unit_state, cc4),
574                       render_state->cc.viewport);
575
576     dri_bo_unmap(render_state->cc.state);
577 }
578
579
580 static void 
581 i965_render_cc_unit(VADriverContextP ctx)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct i965_render_state *render_state = &i965->render_state;
585     struct i965_cc_unit_state *cc_state;
586
587     assert(render_state->cc.viewport);
588
589     dri_bo_map(render_state->cc.state, 1);
590     assert(render_state->cc.state->virtual);
591     cc_state = render_state->cc.state->virtual;
592     memset(cc_state, 0, sizeof(*cc_state));
593
594     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
595     cc_state->cc2.depth_test = 0;       /* disable depth test */
596     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
597     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
598     cc_state->cc3.blend_enable = 0;     /* disable color blend */
599     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
600     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
601
602     cc_state->cc5.dither_enable = 0;    /* disable dither */
603     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
604     cc_state->cc5.statistics_enable = 1;
605     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
606     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
607     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
608
609     dri_bo_emit_reloc(render_state->cc.state,
610                       I915_GEM_DOMAIN_INSTRUCTION, 0,
611                       0,
612                       offsetof(struct i965_cc_unit_state, cc4),
613                       render_state->cc.viewport);
614
615     dri_bo_unmap(render_state->cc.state);
616 }
617
618 static void
619 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
620 {
621     switch (tiling) {
622     case I915_TILING_NONE:
623         ss->ss3.tiled_surface = 0;
624         ss->ss3.tile_walk = 0;
625         break;
626     case I915_TILING_X:
627         ss->ss3.tiled_surface = 1;
628         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
629         break;
630     case I915_TILING_Y:
631         ss->ss3.tiled_surface = 1;
632         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
633         break;
634     }
635 }
636
637 static void
638 i965_render_set_surface_state(
639     struct i965_surface_state *ss,
640     dri_bo                    *bo,
641     unsigned long              offset,
642     unsigned int               width,
643     unsigned int               height,
644     unsigned int               pitch,
645     unsigned int               format,
646     unsigned int               flags
647 )
648 {
649     unsigned int tiling;
650     unsigned int swizzle;
651
652     memset(ss, 0, sizeof(*ss));
653
654     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
655     case I965_PP_FLAG_BOTTOM_FIELD:
656         ss->ss0.vert_line_stride_ofs = 1;
657         /* fall-through */
658     case I965_PP_FLAG_TOP_FIELD:
659         ss->ss0.vert_line_stride = 1;
660         height /= 2;
661         break;
662     }
663
664     ss->ss0.surface_type = I965_SURFACE_2D;
665     ss->ss0.surface_format = format;
666     ss->ss0.color_blend = 1;
667
668     ss->ss1.base_addr = bo->offset + offset;
669
670     ss->ss2.width = width - 1;
671     ss->ss2.height = height - 1;
672
673     ss->ss3.pitch = pitch - 1;
674
675     dri_bo_get_tiling(bo, &tiling, &swizzle);
676     i965_render_set_surface_tiling(ss, tiling);
677 }
678
679 static void
680 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
681 {
682    switch (tiling) {
683    case I915_TILING_NONE:
684       ss->ss0.tiled_surface = 0;
685       ss->ss0.tile_walk = 0;
686       break;
687    case I915_TILING_X:
688       ss->ss0.tiled_surface = 1;
689       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
690       break;
691    case I915_TILING_Y:
692       ss->ss0.tiled_surface = 1;
693       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
694       break;
695    }
696 }
697
698 static void
699 gen7_render_set_surface_state(
700     struct gen7_surface_state *ss,
701     dri_bo                    *bo,
702     unsigned long              offset,
703     int                        width,
704     int                        height,
705     int                        pitch,
706     int                        format,
707     unsigned int               flags
708 )
709 {
710     unsigned int tiling;
711     unsigned int swizzle;
712
713     memset(ss, 0, sizeof(*ss));
714
715     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
716     case I965_PP_FLAG_BOTTOM_FIELD:
717         ss->ss0.vert_line_stride_ofs = 1;
718         /* fall-through */
719     case I965_PP_FLAG_TOP_FIELD:
720         ss->ss0.vert_line_stride = 1;
721         height /= 2;
722         break;
723     }
724
725     ss->ss0.surface_type = I965_SURFACE_2D;
726     ss->ss0.surface_format = format;
727
728     ss->ss1.base_addr = bo->offset + offset;
729
730     ss->ss2.width = width - 1;
731     ss->ss2.height = height - 1;
732
733     ss->ss3.pitch = pitch - 1;
734
735     dri_bo_get_tiling(bo, &tiling, &swizzle);
736     gen7_render_set_surface_tiling(ss, tiling);
737 }
738
739 static void
740 i965_render_src_surface_state(
741     VADriverContextP ctx, 
742     int              index,
743     dri_bo          *region,
744     unsigned long    offset,
745     int              w,
746     int              h,
747     int              pitch,
748     int              format,
749     unsigned int     flags
750 )
751 {
752     struct i965_driver_data *i965 = i965_driver_data(ctx);  
753     struct i965_render_state *render_state = &i965->render_state;
754     void *ss;
755     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
756
757     assert(index < MAX_RENDER_SURFACES);
758
759     dri_bo_map(ss_bo, 1);
760     assert(ss_bo->virtual);
761     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
762
763     if (IS_GEN7(i965->intel.device_id)) {
764         gen7_render_set_surface_state(ss,
765                                       region, offset,
766                                       w, h,
767                                       pitch, format, flags);
768         dri_bo_emit_reloc(ss_bo,
769                           I915_GEM_DOMAIN_SAMPLER, 0,
770                           offset,
771                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
772                           region);
773     } else {
774         i965_render_set_surface_state(ss,
775                                       region, offset,
776                                       w, h,
777                                       pitch, format, flags);
778         dri_bo_emit_reloc(ss_bo,
779                           I915_GEM_DOMAIN_SAMPLER, 0,
780                           offset,
781                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
782                           region);
783     }
784
785     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
786     dri_bo_unmap(ss_bo);
787     render_state->wm.sampler_count++;
788 }
789
790 static void
791 i965_render_src_surfaces_state(
792     VADriverContextP ctx,
793     VASurfaceID      surface,
794     unsigned int     flags
795 )
796 {
797     struct i965_driver_data *i965 = i965_driver_data(ctx);  
798     struct object_surface *obj_surface;
799     int region_pitch;
800     int rw, rh;
801     dri_bo *region;
802
803     obj_surface = SURFACE(surface);
804     assert(obj_surface);
805
806     region_pitch = obj_surface->width;
807     rw = obj_surface->orig_width;
808     rh = obj_surface->orig_height;
809     region = obj_surface->bo;
810
811     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
812     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
813
814     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
815         i965_render_src_surface_state(ctx, 3, region,
816                                       region_pitch * obj_surface->y_cb_offset,
817                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
818                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
819         i965_render_src_surface_state(ctx, 4, region,
820                                       region_pitch * obj_surface->y_cb_offset,
821                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
822                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
823     } else {
824         i965_render_src_surface_state(ctx, 3, region,
825                                       region_pitch * obj_surface->y_cb_offset,
826                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
827                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
828         i965_render_src_surface_state(ctx, 4, region,
829                                       region_pitch * obj_surface->y_cb_offset,
830                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
831                                       I965_SURFACEFORMAT_R8_UNORM, flags);
832         i965_render_src_surface_state(ctx, 5, region,
833                                       region_pitch * obj_surface->y_cr_offset,
834                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
835                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
836         i965_render_src_surface_state(ctx, 6, region,
837                                       region_pitch * obj_surface->y_cr_offset,
838                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
839                                       I965_SURFACEFORMAT_R8_UNORM, flags);
840     }
841 }
842
843 static void
844 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
845                               VASurfaceID surface)
846 {
847     struct i965_driver_data *i965 = i965_driver_data(ctx);  
848     struct object_surface *obj_surface = SURFACE(surface);
849     int w, h;
850     dri_bo *region;
851     dri_bo *subpic_region;
852     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
853     struct object_image *obj_image = IMAGE(obj_subpic->image);
854     assert(obj_surface);
855     assert(obj_surface->bo);
856     w = obj_surface->width;
857     h = obj_surface->height;
858     region = obj_surface->bo;
859     subpic_region = obj_image->bo;
860     /*subpicture surface*/
861     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
862     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
863 }
864
865 static void
866 i965_render_dest_surface_state(VADriverContextP ctx, int index)
867 {
868     struct i965_driver_data *i965 = i965_driver_data(ctx);  
869     struct i965_render_state *render_state = &i965->render_state;
870     struct intel_region *dest_region = render_state->draw_region;
871     void *ss;
872     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
873     int format;
874     assert(index < MAX_RENDER_SURFACES);
875
876     if (dest_region->cpp == 2) {
877         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
878     } else {
879         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
880     }
881
882     dri_bo_map(ss_bo, 1);
883     assert(ss_bo->virtual);
884     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
885
886     if (IS_GEN7(i965->intel.device_id)) {
887         gen7_render_set_surface_state(ss,
888                                       dest_region->bo, 0,
889                                       dest_region->width, dest_region->height,
890                                       dest_region->pitch, format, 0);
891         dri_bo_emit_reloc(ss_bo,
892                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
893                           0,
894                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
895                           dest_region->bo);
896     } else {
897         i965_render_set_surface_state(ss,
898                                       dest_region->bo, 0,
899                                       dest_region->width, dest_region->height,
900                                       dest_region->pitch, format, 0);
901         dri_bo_emit_reloc(ss_bo,
902                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
903                           0,
904                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
905                           dest_region->bo);
906     }
907
908     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
909     dri_bo_unmap(ss_bo);
910 }
911
912 static void
913 i965_fill_vertex_buffer(
914     VADriverContextP ctx,
915     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
916     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
917 )
918 {
919     struct i965_driver_data * const i965 = i965_driver_data(ctx);
920     float vb[12];
921
922     enum { X1, Y1, X2, Y2 };
923
924     static const unsigned int g_rotation_indices[][6] = {
925         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
926         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
927         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
928         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
929     };
930
931     const unsigned int * const rotation_indices =
932         g_rotation_indices[i965->rotation_attrib->value];
933
934     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
935     vb[1]  = tex_coords[rotation_indices[1]];
936     vb[2]  = vid_coords[X2];
937     vb[3]  = vid_coords[Y2];
938
939     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
940     vb[5]  = tex_coords[rotation_indices[3]];
941     vb[6]  = vid_coords[X1];
942     vb[7]  = vid_coords[Y2];
943
944     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
945     vb[9]  = tex_coords[rotation_indices[5]];
946     vb[10] = vid_coords[X1];
947     vb[11] = vid_coords[Y1];
948
949     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
950 }
951
952 static void 
953 i965_subpic_render_upload_vertex(VADriverContextP ctx,
954                                  VASurfaceID surface,
955                                  const VARectangle *output_rect)
956 {    
957     struct i965_driver_data  *i965         = i965_driver_data(ctx);
958     struct object_surface    *obj_surface  = SURFACE(surface);
959     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
960     float tex_coords[4], vid_coords[4];
961     VARectangle dst_rect;
962
963     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
964         dst_rect = obj_subpic->dst_rect;
965     else {
966         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
967         const float sy  = (float)output_rect->height / obj_surface->orig_height;
968         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
969         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
970         dst_rect.width  = sx * obj_subpic->dst_rect.width;
971         dst_rect.height = sy * obj_subpic->dst_rect.height;
972     }
973
974     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
975     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
976     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
977     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
978
979     vid_coords[0] = dst_rect.x;
980     vid_coords[1] = dst_rect.y;
981     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
982     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
983
984     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
985 }
986
987 static void 
988 i965_render_upload_vertex(
989     VADriverContextP   ctx,
990     VASurfaceID        surface,
991     const VARectangle *src_rect,
992     const VARectangle *dst_rect
993 )
994 {
995     struct i965_driver_data *i965 = i965_driver_data(ctx);
996     struct i965_render_state *render_state = &i965->render_state;
997     struct intel_region *dest_region = render_state->draw_region;
998     struct object_surface *obj_surface;
999     float tex_coords[4], vid_coords[4];
1000     int width, height;
1001
1002     obj_surface = SURFACE(surface);
1003     assert(surface);
1004
1005     width  = obj_surface->orig_width;
1006     height = obj_surface->orig_height;
1007
1008     tex_coords[0] = (float)src_rect->x / width;
1009     tex_coords[1] = (float)src_rect->y / height;
1010     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1011     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1012
1013     vid_coords[0] = dest_region->x + dst_rect->x;
1014     vid_coords[1] = dest_region->y + dst_rect->y;
1015     vid_coords[2] = vid_coords[0] + dst_rect->width;
1016     vid_coords[3] = vid_coords[1] + dst_rect->height;
1017
1018     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1019 }
1020
1021 static void
1022 i965_render_upload_constants(VADriverContextP ctx,
1023                              VASurfaceID surface)
1024 {
1025     struct i965_driver_data *i965 = i965_driver_data(ctx);
1026     struct i965_render_state *render_state = &i965->render_state;
1027     unsigned short *constant_buffer;
1028     struct object_surface *obj_surface = SURFACE(surface);
1029
1030     dri_bo_map(render_state->curbe.bo, 1);
1031     assert(render_state->curbe.bo->virtual);
1032     constant_buffer = render_state->curbe.bo->virtual;
1033
1034     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1035         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
1036                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
1037         *constant_buffer = 2;
1038     } else {
1039         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1040             *constant_buffer = 1;
1041         else
1042             *constant_buffer = 0;
1043     }
1044
1045     dri_bo_unmap(render_state->curbe.bo);
1046 }
1047
1048 static void
1049 i965_surface_render_state_setup(
1050     VADriverContextP   ctx,
1051     VASurfaceID        surface,
1052     const VARectangle *src_rect,
1053     const VARectangle *dst_rect,
1054     unsigned int       flags
1055 )
1056 {
1057     i965_render_vs_unit(ctx);
1058     i965_render_sf_unit(ctx);
1059     i965_render_dest_surface_state(ctx, 0);
1060     i965_render_src_surfaces_state(ctx, surface, flags);
1061     i965_render_sampler(ctx);
1062     i965_render_wm_unit(ctx);
1063     i965_render_cc_viewport(ctx);
1064     i965_render_cc_unit(ctx);
1065     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1066     i965_render_upload_constants(ctx, surface);
1067 }
1068
1069 static void
1070 i965_subpic_render_state_setup(
1071     VADriverContextP   ctx,
1072     VASurfaceID        surface,
1073     const VARectangle *src_rect,
1074     const VARectangle *dst_rect
1075 )
1076 {
1077     i965_render_vs_unit(ctx);
1078     i965_render_sf_unit(ctx);
1079     i965_render_dest_surface_state(ctx, 0);
1080     i965_subpic_render_src_surfaces_state(ctx, surface);
1081     i965_render_sampler(ctx);
1082     i965_subpic_render_wm_unit(ctx);
1083     i965_render_cc_viewport(ctx);
1084     i965_subpic_render_cc_unit(ctx);
1085     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1086 }
1087
1088
1089 static void
1090 i965_render_pipeline_select(VADriverContextP ctx)
1091 {
1092     struct i965_driver_data *i965 = i965_driver_data(ctx);
1093     struct intel_batchbuffer *batch = i965->batch;
1094  
1095     BEGIN_BATCH(batch, 1);
1096     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1097     ADVANCE_BATCH(batch);
1098 }
1099
1100 static void
1101 i965_render_state_sip(VADriverContextP ctx)
1102 {
1103     struct i965_driver_data *i965 = i965_driver_data(ctx);
1104     struct intel_batchbuffer *batch = i965->batch;
1105
1106     BEGIN_BATCH(batch, 2);
1107     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1108     OUT_BATCH(batch, 0);
1109     ADVANCE_BATCH(batch);
1110 }
1111
1112 static void
1113 i965_render_state_base_address(VADriverContextP ctx)
1114 {
1115     struct i965_driver_data *i965 = i965_driver_data(ctx);
1116     struct intel_batchbuffer *batch = i965->batch;
1117     struct i965_render_state *render_state = &i965->render_state;
1118
1119     if (IS_IRONLAKE(i965->intel.device_id)) {
1120         BEGIN_BATCH(batch, 8);
1121         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1122         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1123         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1124         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1125         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1126         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1127         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1128         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1129         ADVANCE_BATCH(batch);
1130     } else {
1131         BEGIN_BATCH(batch, 6);
1132         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1133         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1134         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1135         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1136         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1137         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1138         ADVANCE_BATCH(batch);
1139     }
1140 }
1141
1142 static void
1143 i965_render_binding_table_pointers(VADriverContextP ctx)
1144 {
1145     struct i965_driver_data *i965 = i965_driver_data(ctx);
1146     struct intel_batchbuffer *batch = i965->batch;
1147
1148     BEGIN_BATCH(batch, 6);
1149     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1150     OUT_BATCH(batch, 0); /* vs */
1151     OUT_BATCH(batch, 0); /* gs */
1152     OUT_BATCH(batch, 0); /* clip */
1153     OUT_BATCH(batch, 0); /* sf */
1154     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1155     ADVANCE_BATCH(batch);
1156 }
1157
1158 static void 
1159 i965_render_constant_color(VADriverContextP ctx)
1160 {
1161     struct i965_driver_data *i965 = i965_driver_data(ctx);
1162     struct intel_batchbuffer *batch = i965->batch;
1163
1164     BEGIN_BATCH(batch, 5);
1165     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1166     OUT_BATCH(batch, float_to_uint(1.0));
1167     OUT_BATCH(batch, float_to_uint(0.0));
1168     OUT_BATCH(batch, float_to_uint(1.0));
1169     OUT_BATCH(batch, float_to_uint(1.0));
1170     ADVANCE_BATCH(batch);
1171 }
1172
1173 static void
1174 i965_render_pipelined_pointers(VADriverContextP ctx)
1175 {
1176     struct i965_driver_data *i965 = i965_driver_data(ctx);
1177     struct intel_batchbuffer *batch = i965->batch;
1178     struct i965_render_state *render_state = &i965->render_state;
1179
1180     BEGIN_BATCH(batch, 7);
1181     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1182     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1183     OUT_BATCH(batch, 0);  /* disable GS */
1184     OUT_BATCH(batch, 0);  /* disable CLIP */
1185     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1186     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1187     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1188     ADVANCE_BATCH(batch);
1189 }
1190
1191 static void
1192 i965_render_urb_layout(VADriverContextP ctx)
1193 {
1194     struct i965_driver_data *i965 = i965_driver_data(ctx);
1195     struct intel_batchbuffer *batch = i965->batch;
1196     int urb_vs_start, urb_vs_size;
1197     int urb_gs_start, urb_gs_size;
1198     int urb_clip_start, urb_clip_size;
1199     int urb_sf_start, urb_sf_size;
1200     int urb_cs_start, urb_cs_size;
1201
1202     urb_vs_start = 0;
1203     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1204     urb_gs_start = urb_vs_start + urb_vs_size;
1205     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1206     urb_clip_start = urb_gs_start + urb_gs_size;
1207     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1208     urb_sf_start = urb_clip_start + urb_clip_size;
1209     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1210     urb_cs_start = urb_sf_start + urb_sf_size;
1211     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1212
1213     BEGIN_BATCH(batch, 3);
1214     OUT_BATCH(batch, 
1215               CMD_URB_FENCE |
1216               UF0_CS_REALLOC |
1217               UF0_SF_REALLOC |
1218               UF0_CLIP_REALLOC |
1219               UF0_GS_REALLOC |
1220               UF0_VS_REALLOC |
1221               1);
1222     OUT_BATCH(batch, 
1223               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1224               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1225               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1226     OUT_BATCH(batch,
1227               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1228               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1229     ADVANCE_BATCH(batch);
1230 }
1231
1232 static void 
1233 i965_render_cs_urb_layout(VADriverContextP ctx)
1234 {
1235     struct i965_driver_data *i965 = i965_driver_data(ctx);
1236     struct intel_batchbuffer *batch = i965->batch;
1237
1238     BEGIN_BATCH(batch, 2);
1239     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1240     OUT_BATCH(batch,
1241               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1242               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1243     ADVANCE_BATCH(batch);
1244 }
1245
1246 static void
1247 i965_render_constant_buffer(VADriverContextP ctx)
1248 {
1249     struct i965_driver_data *i965 = i965_driver_data(ctx);
1250     struct intel_batchbuffer *batch = i965->batch;
1251     struct i965_render_state *render_state = &i965->render_state;
1252
1253     BEGIN_BATCH(batch, 2);
1254     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1255     OUT_RELOC(batch, render_state->curbe.bo,
1256               I915_GEM_DOMAIN_INSTRUCTION, 0,
1257               URB_CS_ENTRY_SIZE - 1);
1258     ADVANCE_BATCH(batch);    
1259 }
1260
1261 static void
1262 i965_render_drawing_rectangle(VADriverContextP ctx)
1263 {
1264     struct i965_driver_data *i965 = i965_driver_data(ctx);
1265     struct intel_batchbuffer *batch = i965->batch;
1266     struct i965_render_state *render_state = &i965->render_state;
1267     struct intel_region *dest_region = render_state->draw_region;
1268
1269     BEGIN_BATCH(batch, 4);
1270     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1271     OUT_BATCH(batch, 0x00000000);
1272     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1273     OUT_BATCH(batch, 0x00000000);         
1274     ADVANCE_BATCH(batch);
1275 }
1276
1277 static void
1278 i965_render_vertex_elements(VADriverContextP ctx)
1279 {
1280     struct i965_driver_data *i965 = i965_driver_data(ctx);
1281     struct intel_batchbuffer *batch = i965->batch;
1282
1283     if (IS_IRONLAKE(i965->intel.device_id)) {
1284         BEGIN_BATCH(batch, 5);
1285         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1286         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1287         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1288                   VE0_VALID |
1289                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1290                   (0 << VE0_OFFSET_SHIFT));
1291         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1292                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1293                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1294                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1295         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1296         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1297                   VE0_VALID |
1298                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1299                   (8 << VE0_OFFSET_SHIFT));
1300         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1301                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1302                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1303                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1304         ADVANCE_BATCH(batch);
1305     } else {
1306         BEGIN_BATCH(batch, 5);
1307         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1308         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1309         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1310                   VE0_VALID |
1311                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1312                   (0 << VE0_OFFSET_SHIFT));
1313         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1314                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1315                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1316                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1317                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1318         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1319         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1320                   VE0_VALID |
1321                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1322                   (8 << VE0_OFFSET_SHIFT));
1323         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1324                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1325                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1326                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1327                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1328         ADVANCE_BATCH(batch);
1329     }
1330 }
1331
1332 static void
1333 i965_render_upload_image_palette(
1334     VADriverContextP ctx,
1335     VAImageID        image_id,
1336     unsigned int     alpha
1337 )
1338 {
1339     struct i965_driver_data *i965 = i965_driver_data(ctx);
1340     struct intel_batchbuffer *batch = i965->batch;
1341     unsigned int i;
1342
1343     struct object_image *obj_image = IMAGE(image_id);
1344     assert(obj_image);
1345
1346     if (obj_image->image.num_palette_entries == 0)
1347         return;
1348
1349     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1350     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1351     /*fill palette*/
1352     //int32_t out[16]; //0-23:color 23-31:alpha
1353     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1354         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1355     ADVANCE_BATCH(batch);
1356 }
1357
1358 static void
1359 i965_render_startup(VADriverContextP ctx)
1360 {
1361     struct i965_driver_data *i965 = i965_driver_data(ctx);
1362     struct intel_batchbuffer *batch = i965->batch;
1363     struct i965_render_state *render_state = &i965->render_state;
1364
1365     BEGIN_BATCH(batch, 11);
1366     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1367     OUT_BATCH(batch, 
1368               (0 << VB0_BUFFER_INDEX_SHIFT) |
1369               VB0_VERTEXDATA |
1370               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1371     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1372
1373     if (IS_IRONLAKE(i965->intel.device_id))
1374         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1375     else
1376         OUT_BATCH(batch, 3);
1377
1378     OUT_BATCH(batch, 0);
1379
1380     OUT_BATCH(batch, 
1381               CMD_3DPRIMITIVE |
1382               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1383               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1384               (0 << 9) |
1385               4);
1386     OUT_BATCH(batch, 3); /* vertex count per instance */
1387     OUT_BATCH(batch, 0); /* start vertex offset */
1388     OUT_BATCH(batch, 1); /* single instance */
1389     OUT_BATCH(batch, 0); /* start instance location */
1390     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1391     ADVANCE_BATCH(batch);
1392 }
1393
1394 static void 
1395 i965_clear_dest_region(VADriverContextP ctx)
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct intel_batchbuffer *batch = i965->batch;
1399     struct i965_render_state *render_state = &i965->render_state;
1400     struct intel_region *dest_region = render_state->draw_region;
1401     unsigned int blt_cmd, br13;
1402     int pitch;
1403
1404     blt_cmd = XY_COLOR_BLT_CMD;
1405     br13 = 0xf0 << 16;
1406     pitch = dest_region->pitch;
1407
1408     if (dest_region->cpp == 4) {
1409         br13 |= BR13_8888;
1410         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1411     } else {
1412         assert(dest_region->cpp == 2);
1413         br13 |= BR13_565;
1414     }
1415
1416     if (dest_region->tiling != I915_TILING_NONE) {
1417         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1418         pitch /= 4;
1419     }
1420
1421     br13 |= pitch;
1422
1423     if (IS_GEN6(i965->intel.device_id) ||
1424         IS_GEN7(i965->intel.device_id)) {
1425         intel_batchbuffer_start_atomic_blt(batch, 24);
1426         BEGIN_BLT_BATCH(batch, 6);
1427     } else {
1428         intel_batchbuffer_start_atomic(batch, 24);
1429         BEGIN_BATCH(batch, 6);
1430     }
1431
1432     OUT_BATCH(batch, blt_cmd);
1433     OUT_BATCH(batch, br13);
1434     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1435     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1436               (dest_region->x + dest_region->width));
1437     OUT_RELOC(batch, dest_region->bo, 
1438               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1439               0);
1440     OUT_BATCH(batch, 0x0);
1441     ADVANCE_BATCH(batch);
1442     intel_batchbuffer_end_atomic(batch);
1443 }
1444
1445 static void
1446 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1447 {
1448     struct i965_driver_data *i965 = i965_driver_data(ctx);
1449     struct intel_batchbuffer *batch = i965->batch;
1450
1451     i965_clear_dest_region(ctx);
1452     intel_batchbuffer_start_atomic(batch, 0x1000);
1453     intel_batchbuffer_emit_mi_flush(batch);
1454     i965_render_pipeline_select(ctx);
1455     i965_render_state_sip(ctx);
1456     i965_render_state_base_address(ctx);
1457     i965_render_binding_table_pointers(ctx);
1458     i965_render_constant_color(ctx);
1459     i965_render_pipelined_pointers(ctx);
1460     i965_render_urb_layout(ctx);
1461     i965_render_cs_urb_layout(ctx);
1462     i965_render_constant_buffer(ctx);
1463     i965_render_drawing_rectangle(ctx);
1464     i965_render_vertex_elements(ctx);
1465     i965_render_startup(ctx);
1466     intel_batchbuffer_end_atomic(batch);
1467 }
1468
1469 static void
1470 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1471 {
1472     struct i965_driver_data *i965 = i965_driver_data(ctx);
1473     struct intel_batchbuffer *batch = i965->batch;
1474
1475     intel_batchbuffer_start_atomic(batch, 0x1000);
1476     intel_batchbuffer_emit_mi_flush(batch);
1477     i965_render_pipeline_select(ctx);
1478     i965_render_state_sip(ctx);
1479     i965_render_state_base_address(ctx);
1480     i965_render_binding_table_pointers(ctx);
1481     i965_render_constant_color(ctx);
1482     i965_render_pipelined_pointers(ctx);
1483     i965_render_urb_layout(ctx);
1484     i965_render_cs_urb_layout(ctx);
1485     i965_render_drawing_rectangle(ctx);
1486     i965_render_vertex_elements(ctx);
1487     i965_render_startup(ctx);
1488     intel_batchbuffer_end_atomic(batch);
1489 }
1490
1491
1492 static void 
1493 i965_render_initialize(VADriverContextP ctx)
1494 {
1495     struct i965_driver_data *i965 = i965_driver_data(ctx);
1496     struct i965_render_state *render_state = &i965->render_state;
1497     dri_bo *bo;
1498
1499     /* VERTEX BUFFER */
1500     dri_bo_unreference(render_state->vb.vertex_buffer);
1501     bo = dri_bo_alloc(i965->intel.bufmgr,
1502                       "vertex buffer",
1503                       4096,
1504                       4096);
1505     assert(bo);
1506     render_state->vb.vertex_buffer = bo;
1507
1508     /* VS */
1509     dri_bo_unreference(render_state->vs.state);
1510     bo = dri_bo_alloc(i965->intel.bufmgr,
1511                       "vs state",
1512                       sizeof(struct i965_vs_unit_state),
1513                       64);
1514     assert(bo);
1515     render_state->vs.state = bo;
1516
1517     /* GS */
1518     /* CLIP */
1519     /* SF */
1520     dri_bo_unreference(render_state->sf.state);
1521     bo = dri_bo_alloc(i965->intel.bufmgr,
1522                       "sf state",
1523                       sizeof(struct i965_sf_unit_state),
1524                       64);
1525     assert(bo);
1526     render_state->sf.state = bo;
1527
1528     /* WM */
1529     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1530     bo = dri_bo_alloc(i965->intel.bufmgr,
1531                       "surface state & binding table",
1532                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1533                       4096);
1534     assert(bo);
1535     render_state->wm.surface_state_binding_table_bo = bo;
1536
1537     dri_bo_unreference(render_state->wm.sampler);
1538     bo = dri_bo_alloc(i965->intel.bufmgr,
1539                       "sampler state",
1540                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1541                       64);
1542     assert(bo);
1543     render_state->wm.sampler = bo;
1544     render_state->wm.sampler_count = 0;
1545
1546     dri_bo_unreference(render_state->wm.state);
1547     bo = dri_bo_alloc(i965->intel.bufmgr,
1548                       "wm state",
1549                       sizeof(struct i965_wm_unit_state),
1550                       64);
1551     assert(bo);
1552     render_state->wm.state = bo;
1553
1554     /* COLOR CALCULATOR */
1555     dri_bo_unreference(render_state->cc.state);
1556     bo = dri_bo_alloc(i965->intel.bufmgr,
1557                       "color calc state",
1558                       sizeof(struct i965_cc_unit_state),
1559                       64);
1560     assert(bo);
1561     render_state->cc.state = bo;
1562
1563     dri_bo_unreference(render_state->cc.viewport);
1564     bo = dri_bo_alloc(i965->intel.bufmgr,
1565                       "cc viewport",
1566                       sizeof(struct i965_cc_viewport),
1567                       64);
1568     assert(bo);
1569     render_state->cc.viewport = bo;
1570 }
1571
1572 static void
1573 i965_render_put_surface(
1574     VADriverContextP   ctx,
1575     VASurfaceID        surface,
1576     const VARectangle *src_rect,
1577     const VARectangle *dst_rect,
1578     unsigned int       flags
1579 )
1580 {
1581     struct i965_driver_data *i965 = i965_driver_data(ctx);
1582     struct intel_batchbuffer *batch = i965->batch;
1583
1584     i965_render_initialize(ctx);
1585     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags);
1586     i965_surface_render_pipeline_setup(ctx);
1587     intel_batchbuffer_flush(batch);
1588 }
1589
1590 static void
1591 i965_render_put_subpicture(
1592     VADriverContextP   ctx,
1593     VASurfaceID        surface,
1594     const VARectangle *src_rect,
1595     const VARectangle *dst_rect
1596 )
1597 {
1598     struct i965_driver_data *i965 = i965_driver_data(ctx);
1599     struct intel_batchbuffer *batch = i965->batch;
1600     struct object_surface *obj_surface = SURFACE(surface);
1601     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1602
1603     assert(obj_subpic);
1604
1605     i965_render_initialize(ctx);
1606     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1607     i965_subpic_render_pipeline_setup(ctx);
1608     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1609     intel_batchbuffer_flush(batch);
1610 }
1611
1612 /*
1613  * for GEN6+
1614  */
1615 static void 
1616 gen6_render_initialize(VADriverContextP ctx)
1617 {
1618     struct i965_driver_data *i965 = i965_driver_data(ctx);
1619     struct i965_render_state *render_state = &i965->render_state;
1620     dri_bo *bo;
1621
1622     /* VERTEX BUFFER */
1623     dri_bo_unreference(render_state->vb.vertex_buffer);
1624     bo = dri_bo_alloc(i965->intel.bufmgr,
1625                       "vertex buffer",
1626                       4096,
1627                       4096);
1628     assert(bo);
1629     render_state->vb.vertex_buffer = bo;
1630
1631     /* WM */
1632     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1633     bo = dri_bo_alloc(i965->intel.bufmgr,
1634                       "surface state & binding table",
1635                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1636                       4096);
1637     assert(bo);
1638     render_state->wm.surface_state_binding_table_bo = bo;
1639
1640     dri_bo_unreference(render_state->wm.sampler);
1641     bo = dri_bo_alloc(i965->intel.bufmgr,
1642                       "sampler state",
1643                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1644                       4096);
1645     assert(bo);
1646     render_state->wm.sampler = bo;
1647     render_state->wm.sampler_count = 0;
1648
1649     /* COLOR CALCULATOR */
1650     dri_bo_unreference(render_state->cc.state);
1651     bo = dri_bo_alloc(i965->intel.bufmgr,
1652                       "color calc state",
1653                       sizeof(struct gen6_color_calc_state),
1654                       4096);
1655     assert(bo);
1656     render_state->cc.state = bo;
1657
1658     /* CC VIEWPORT */
1659     dri_bo_unreference(render_state->cc.viewport);
1660     bo = dri_bo_alloc(i965->intel.bufmgr,
1661                       "cc viewport",
1662                       sizeof(struct i965_cc_viewport),
1663                       4096);
1664     assert(bo);
1665     render_state->cc.viewport = bo;
1666
1667     /* BLEND STATE */
1668     dri_bo_unreference(render_state->cc.blend);
1669     bo = dri_bo_alloc(i965->intel.bufmgr,
1670                       "blend state",
1671                       sizeof(struct gen6_blend_state),
1672                       4096);
1673     assert(bo);
1674     render_state->cc.blend = bo;
1675
1676     /* DEPTH & STENCIL STATE */
1677     dri_bo_unreference(render_state->cc.depth_stencil);
1678     bo = dri_bo_alloc(i965->intel.bufmgr,
1679                       "depth & stencil state",
1680                       sizeof(struct gen6_depth_stencil_state),
1681                       4096);
1682     assert(bo);
1683     render_state->cc.depth_stencil = bo;
1684 }
1685
1686 static void
1687 gen6_render_color_calc_state(VADriverContextP ctx)
1688 {
1689     struct i965_driver_data *i965 = i965_driver_data(ctx);
1690     struct i965_render_state *render_state = &i965->render_state;
1691     struct gen6_color_calc_state *color_calc_state;
1692     
1693     dri_bo_map(render_state->cc.state, 1);
1694     assert(render_state->cc.state->virtual);
1695     color_calc_state = render_state->cc.state->virtual;
1696     memset(color_calc_state, 0, sizeof(*color_calc_state));
1697     color_calc_state->constant_r = 1.0;
1698     color_calc_state->constant_g = 0.0;
1699     color_calc_state->constant_b = 1.0;
1700     color_calc_state->constant_a = 1.0;
1701     dri_bo_unmap(render_state->cc.state);
1702 }
1703
1704 static void
1705 gen6_render_blend_state(VADriverContextP ctx)
1706 {
1707     struct i965_driver_data *i965 = i965_driver_data(ctx);
1708     struct i965_render_state *render_state = &i965->render_state;
1709     struct gen6_blend_state *blend_state;
1710     
1711     dri_bo_map(render_state->cc.blend, 1);
1712     assert(render_state->cc.blend->virtual);
1713     blend_state = render_state->cc.blend->virtual;
1714     memset(blend_state, 0, sizeof(*blend_state));
1715     blend_state->blend1.logic_op_enable = 1;
1716     blend_state->blend1.logic_op_func = 0xc;
1717     dri_bo_unmap(render_state->cc.blend);
1718 }
1719
1720 static void
1721 gen6_render_depth_stencil_state(VADriverContextP ctx)
1722 {
1723     struct i965_driver_data *i965 = i965_driver_data(ctx);
1724     struct i965_render_state *render_state = &i965->render_state;
1725     struct gen6_depth_stencil_state *depth_stencil_state;
1726     
1727     dri_bo_map(render_state->cc.depth_stencil, 1);
1728     assert(render_state->cc.depth_stencil->virtual);
1729     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1730     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1731     dri_bo_unmap(render_state->cc.depth_stencil);
1732 }
1733
1734 static void
1735 gen6_render_setup_states(
1736     VADriverContextP   ctx,
1737     VASurfaceID        surface,
1738     const VARectangle *src_rect,
1739     const VARectangle *dst_rect,
1740     unsigned int       flags
1741 )
1742 {
1743     i965_render_dest_surface_state(ctx, 0);
1744     i965_render_src_surfaces_state(ctx, surface, flags);
1745     i965_render_sampler(ctx);
1746     i965_render_cc_viewport(ctx);
1747     gen6_render_color_calc_state(ctx);
1748     gen6_render_blend_state(ctx);
1749     gen6_render_depth_stencil_state(ctx);
1750     i965_render_upload_constants(ctx, surface);
1751     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1752 }
1753
1754 static void
1755 gen6_emit_invarient_states(VADriverContextP ctx)
1756 {
1757     struct i965_driver_data *i965 = i965_driver_data(ctx);
1758     struct intel_batchbuffer *batch = i965->batch;
1759
1760     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1761
1762     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1763     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1764               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1765     OUT_BATCH(batch, 0);
1766
1767     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1768     OUT_BATCH(batch, 1);
1769
1770     /* Set system instruction pointer */
1771     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1772     OUT_BATCH(batch, 0);
1773 }
1774
1775 static void
1776 gen6_emit_state_base_address(VADriverContextP ctx)
1777 {
1778     struct i965_driver_data *i965 = i965_driver_data(ctx);
1779     struct intel_batchbuffer *batch = i965->batch;
1780     struct i965_render_state *render_state = &i965->render_state;
1781
1782     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1783     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1784     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1785     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1786     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1787     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1788     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1789     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1790     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1791     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1792 }
1793
1794 static void
1795 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1796 {
1797     struct i965_driver_data *i965 = i965_driver_data(ctx);
1798     struct intel_batchbuffer *batch = i965->batch;
1799     struct i965_render_state *render_state = &i965->render_state;
1800
1801     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1802               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1803               (4 - 2));
1804     OUT_BATCH(batch, 0);
1805     OUT_BATCH(batch, 0);
1806     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1807 }
1808
1809 static void
1810 gen6_emit_urb(VADriverContextP ctx)
1811 {
1812     struct i965_driver_data *i965 = i965_driver_data(ctx);
1813     struct intel_batchbuffer *batch = i965->batch;
1814
1815     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1816     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1817               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1818     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1819               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1820 }
1821
1822 static void
1823 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1824 {
1825     struct i965_driver_data *i965 = i965_driver_data(ctx);
1826     struct intel_batchbuffer *batch = i965->batch;
1827     struct i965_render_state *render_state = &i965->render_state;
1828
1829     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1830     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1831     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1832     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1833 }
1834
1835 static void
1836 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1837 {
1838     struct i965_driver_data *i965 = i965_driver_data(ctx);
1839     struct intel_batchbuffer *batch = i965->batch;
1840     struct i965_render_state *render_state = &i965->render_state;
1841
1842     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1843               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1844               (4 - 2));
1845     OUT_BATCH(batch, 0); /* VS */
1846     OUT_BATCH(batch, 0); /* GS */
1847     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1848 }
1849
1850 static void
1851 gen6_emit_binding_table(VADriverContextP ctx)
1852 {
1853     struct i965_driver_data *i965 = i965_driver_data(ctx);
1854     struct intel_batchbuffer *batch = i965->batch;
1855
1856     /* Binding table pointers */
1857     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1858               GEN6_BINDING_TABLE_MODIFY_PS |
1859               (4 - 2));
1860     OUT_BATCH(batch, 0);                /* vs */
1861     OUT_BATCH(batch, 0);                /* gs */
1862     /* Only the PS uses the binding table */
1863     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1864 }
1865
1866 static void
1867 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1868 {
1869     struct i965_driver_data *i965 = i965_driver_data(ctx);
1870     struct intel_batchbuffer *batch = i965->batch;
1871
1872     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1873     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1874               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1875     OUT_BATCH(batch, 0);
1876     OUT_BATCH(batch, 0);
1877     OUT_BATCH(batch, 0);
1878     OUT_BATCH(batch, 0);
1879     OUT_BATCH(batch, 0);
1880
1881     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1882     OUT_BATCH(batch, 0);
1883 }
1884
1885 static void
1886 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1887 {
1888     i965_render_drawing_rectangle(ctx);
1889 }
1890
1891 static void 
1892 gen6_emit_vs_state(VADriverContextP ctx)
1893 {
1894     struct i965_driver_data *i965 = i965_driver_data(ctx);
1895     struct intel_batchbuffer *batch = i965->batch;
1896
1897     /* disable VS constant buffer */
1898     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1899     OUT_BATCH(batch, 0);
1900     OUT_BATCH(batch, 0);
1901     OUT_BATCH(batch, 0);
1902     OUT_BATCH(batch, 0);
1903         
1904     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1905     OUT_BATCH(batch, 0); /* without VS kernel */
1906     OUT_BATCH(batch, 0);
1907     OUT_BATCH(batch, 0);
1908     OUT_BATCH(batch, 0);
1909     OUT_BATCH(batch, 0); /* pass-through */
1910 }
1911
1912 static void 
1913 gen6_emit_gs_state(VADriverContextP ctx)
1914 {
1915     struct i965_driver_data *i965 = i965_driver_data(ctx);
1916     struct intel_batchbuffer *batch = i965->batch;
1917
1918     /* disable GS constant buffer */
1919     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1920     OUT_BATCH(batch, 0);
1921     OUT_BATCH(batch, 0);
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924         
1925     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1926     OUT_BATCH(batch, 0); /* without GS kernel */
1927     OUT_BATCH(batch, 0);
1928     OUT_BATCH(batch, 0);
1929     OUT_BATCH(batch, 0);
1930     OUT_BATCH(batch, 0);
1931     OUT_BATCH(batch, 0); /* pass-through */
1932 }
1933
1934 static void 
1935 gen6_emit_clip_state(VADriverContextP ctx)
1936 {
1937     struct i965_driver_data *i965 = i965_driver_data(ctx);
1938     struct intel_batchbuffer *batch = i965->batch;
1939
1940     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1941     OUT_BATCH(batch, 0);
1942     OUT_BATCH(batch, 0); /* pass-through */
1943     OUT_BATCH(batch, 0);
1944 }
1945
1946 static void 
1947 gen6_emit_sf_state(VADriverContextP ctx)
1948 {
1949     struct i965_driver_data *i965 = i965_driver_data(ctx);
1950     struct intel_batchbuffer *batch = i965->batch;
1951
1952     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1953     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1954               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1955               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1956     OUT_BATCH(batch, 0);
1957     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1958     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1959     OUT_BATCH(batch, 0);
1960     OUT_BATCH(batch, 0);
1961     OUT_BATCH(batch, 0);
1962     OUT_BATCH(batch, 0);
1963     OUT_BATCH(batch, 0); /* DW9 */
1964     OUT_BATCH(batch, 0);
1965     OUT_BATCH(batch, 0);
1966     OUT_BATCH(batch, 0);
1967     OUT_BATCH(batch, 0);
1968     OUT_BATCH(batch, 0); /* DW14 */
1969     OUT_BATCH(batch, 0);
1970     OUT_BATCH(batch, 0);
1971     OUT_BATCH(batch, 0);
1972     OUT_BATCH(batch, 0);
1973     OUT_BATCH(batch, 0); /* DW19 */
1974 }
1975
1976 static void 
1977 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1978 {
1979     struct i965_driver_data *i965 = i965_driver_data(ctx);
1980     struct intel_batchbuffer *batch = i965->batch;
1981     struct i965_render_state *render_state = &i965->render_state;
1982
1983     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1984               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1985               (5 - 2));
1986     OUT_RELOC(batch, 
1987               render_state->curbe.bo,
1988               I915_GEM_DOMAIN_INSTRUCTION, 0,
1989               0);
1990     OUT_BATCH(batch, 0);
1991     OUT_BATCH(batch, 0);
1992     OUT_BATCH(batch, 0);
1993
1994     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1995     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1996               I915_GEM_DOMAIN_INSTRUCTION, 0,
1997               0);
1998     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1999               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2000     OUT_BATCH(batch, 0);
2001     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2002     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2003               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2004               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2005     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2006               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2007     OUT_BATCH(batch, 0);
2008     OUT_BATCH(batch, 0);
2009 }
2010
2011 static void
2012 gen6_emit_vertex_element_state(VADriverContextP ctx)
2013 {
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     struct intel_batchbuffer *batch = i965->batch;
2016
2017     /* Set up our vertex elements, sourced from the single vertex buffer. */
2018     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2019     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2020     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2021               GEN6_VE0_VALID |
2022               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2023               (0 << VE0_OFFSET_SHIFT));
2024     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2025               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2026               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2027               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2028     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2029     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2030               GEN6_VE0_VALID |
2031               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2032               (8 << VE0_OFFSET_SHIFT));
2033     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2034               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2035               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2036               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2037 }
2038
2039 static void
2040 gen6_emit_vertices(VADriverContextP ctx)
2041 {
2042     struct i965_driver_data *i965 = i965_driver_data(ctx);
2043     struct intel_batchbuffer *batch = i965->batch;
2044     struct i965_render_state *render_state = &i965->render_state;
2045
2046     BEGIN_BATCH(batch, 11);
2047     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2048     OUT_BATCH(batch, 
2049               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2050               GEN6_VB0_VERTEXDATA |
2051               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2052     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2053     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2054     OUT_BATCH(batch, 0);
2055
2056     OUT_BATCH(batch, 
2057               CMD_3DPRIMITIVE |
2058               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2059               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2060               (0 << 9) |
2061               4);
2062     OUT_BATCH(batch, 3); /* vertex count per instance */
2063     OUT_BATCH(batch, 0); /* start vertex offset */
2064     OUT_BATCH(batch, 1); /* single instance */
2065     OUT_BATCH(batch, 0); /* start instance location */
2066     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2067     ADVANCE_BATCH(batch);
2068 }
2069
2070 static void
2071 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2072 {
2073     struct i965_driver_data *i965 = i965_driver_data(ctx);
2074     struct intel_batchbuffer *batch = i965->batch;
2075
2076     intel_batchbuffer_start_atomic(batch, 0x1000);
2077     intel_batchbuffer_emit_mi_flush(batch);
2078     gen6_emit_invarient_states(ctx);
2079     gen6_emit_state_base_address(ctx);
2080     gen6_emit_viewport_state_pointers(ctx);
2081     gen6_emit_urb(ctx);
2082     gen6_emit_cc_state_pointers(ctx);
2083     gen6_emit_sampler_state_pointers(ctx);
2084     gen6_emit_vs_state(ctx);
2085     gen6_emit_gs_state(ctx);
2086     gen6_emit_clip_state(ctx);
2087     gen6_emit_sf_state(ctx);
2088     gen6_emit_wm_state(ctx, kernel);
2089     gen6_emit_binding_table(ctx);
2090     gen6_emit_depth_buffer_state(ctx);
2091     gen6_emit_drawing_rectangle(ctx);
2092     gen6_emit_vertex_element_state(ctx);
2093     gen6_emit_vertices(ctx);
2094     intel_batchbuffer_end_atomic(batch);
2095 }
2096
2097 static void
2098 gen6_render_put_surface(
2099     VADriverContextP   ctx,
2100     VASurfaceID        surface,
2101     const VARectangle *src_rect,
2102     const VARectangle *dst_rect,
2103     unsigned int       flags
2104 )
2105 {
2106     struct i965_driver_data *i965 = i965_driver_data(ctx);
2107     struct intel_batchbuffer *batch = i965->batch;
2108
2109     gen6_render_initialize(ctx);
2110     gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2111     i965_clear_dest_region(ctx);
2112     gen6_render_emit_states(ctx, PS_KERNEL);
2113     intel_batchbuffer_flush(batch);
2114 }
2115
2116 static void
2117 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2118 {
2119     struct i965_driver_data *i965 = i965_driver_data(ctx);
2120     struct i965_render_state *render_state = &i965->render_state;
2121     struct gen6_blend_state *blend_state;
2122
2123     dri_bo_unmap(render_state->cc.state);    
2124     dri_bo_map(render_state->cc.blend, 1);
2125     assert(render_state->cc.blend->virtual);
2126     blend_state = render_state->cc.blend->virtual;
2127     memset(blend_state, 0, sizeof(*blend_state));
2128     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2129     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2130     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2131     blend_state->blend0.blend_enable = 1;
2132     blend_state->blend1.post_blend_clamp_enable = 1;
2133     blend_state->blend1.pre_blend_clamp_enable = 1;
2134     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2135     dri_bo_unmap(render_state->cc.blend);
2136 }
2137
2138 static void
2139 gen6_subpicture_render_setup_states(
2140     VADriverContextP   ctx,
2141     VASurfaceID        surface,
2142     const VARectangle *src_rect,
2143     const VARectangle *dst_rect
2144 )
2145 {
2146     i965_render_dest_surface_state(ctx, 0);
2147     i965_subpic_render_src_surfaces_state(ctx, surface);
2148     i965_render_sampler(ctx);
2149     i965_render_cc_viewport(ctx);
2150     gen6_render_color_calc_state(ctx);
2151     gen6_subpicture_render_blend_state(ctx);
2152     gen6_render_depth_stencil_state(ctx);
2153     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2154 }
2155
2156 static void
2157 gen6_render_put_subpicture(
2158     VADriverContextP   ctx,
2159     VASurfaceID        surface,
2160     const VARectangle *src_rect,
2161     const VARectangle *dst_rect
2162 )
2163 {
2164     struct i965_driver_data *i965 = i965_driver_data(ctx);
2165     struct intel_batchbuffer *batch = i965->batch;
2166     struct object_surface *obj_surface = SURFACE(surface);
2167     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2168
2169     assert(obj_subpic);
2170     gen6_render_initialize(ctx);
2171     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2172     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2173     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2174     intel_batchbuffer_flush(batch);
2175 }
2176
2177 /*
2178  * for GEN7
2179  */
2180 static void 
2181 gen7_render_initialize(VADriverContextP ctx)
2182 {
2183     struct i965_driver_data *i965 = i965_driver_data(ctx);
2184     struct i965_render_state *render_state = &i965->render_state;
2185     dri_bo *bo;
2186
2187     /* VERTEX BUFFER */
2188     dri_bo_unreference(render_state->vb.vertex_buffer);
2189     bo = dri_bo_alloc(i965->intel.bufmgr,
2190                       "vertex buffer",
2191                       4096,
2192                       4096);
2193     assert(bo);
2194     render_state->vb.vertex_buffer = bo;
2195
2196     /* WM */
2197     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2198     bo = dri_bo_alloc(i965->intel.bufmgr,
2199                       "surface state & binding table",
2200                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2201                       4096);
2202     assert(bo);
2203     render_state->wm.surface_state_binding_table_bo = bo;
2204
2205     dri_bo_unreference(render_state->wm.sampler);
2206     bo = dri_bo_alloc(i965->intel.bufmgr,
2207                       "sampler state",
2208                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2209                       4096);
2210     assert(bo);
2211     render_state->wm.sampler = bo;
2212     render_state->wm.sampler_count = 0;
2213
2214     /* COLOR CALCULATOR */
2215     dri_bo_unreference(render_state->cc.state);
2216     bo = dri_bo_alloc(i965->intel.bufmgr,
2217                       "color calc state",
2218                       sizeof(struct gen6_color_calc_state),
2219                       4096);
2220     assert(bo);
2221     render_state->cc.state = bo;
2222
2223     /* CC VIEWPORT */
2224     dri_bo_unreference(render_state->cc.viewport);
2225     bo = dri_bo_alloc(i965->intel.bufmgr,
2226                       "cc viewport",
2227                       sizeof(struct i965_cc_viewport),
2228                       4096);
2229     assert(bo);
2230     render_state->cc.viewport = bo;
2231
2232     /* BLEND STATE */
2233     dri_bo_unreference(render_state->cc.blend);
2234     bo = dri_bo_alloc(i965->intel.bufmgr,
2235                       "blend state",
2236                       sizeof(struct gen6_blend_state),
2237                       4096);
2238     assert(bo);
2239     render_state->cc.blend = bo;
2240
2241     /* DEPTH & STENCIL STATE */
2242     dri_bo_unreference(render_state->cc.depth_stencil);
2243     bo = dri_bo_alloc(i965->intel.bufmgr,
2244                       "depth & stencil state",
2245                       sizeof(struct gen6_depth_stencil_state),
2246                       4096);
2247     assert(bo);
2248     render_state->cc.depth_stencil = bo;
2249 }
2250
2251 static void
2252 gen7_render_color_calc_state(VADriverContextP ctx)
2253 {
2254     struct i965_driver_data *i965 = i965_driver_data(ctx);
2255     struct i965_render_state *render_state = &i965->render_state;
2256     struct gen6_color_calc_state *color_calc_state;
2257     
2258     dri_bo_map(render_state->cc.state, 1);
2259     assert(render_state->cc.state->virtual);
2260     color_calc_state = render_state->cc.state->virtual;
2261     memset(color_calc_state, 0, sizeof(*color_calc_state));
2262     color_calc_state->constant_r = 1.0;
2263     color_calc_state->constant_g = 0.0;
2264     color_calc_state->constant_b = 1.0;
2265     color_calc_state->constant_a = 1.0;
2266     dri_bo_unmap(render_state->cc.state);
2267 }
2268
2269 static void
2270 gen7_render_blend_state(VADriverContextP ctx)
2271 {
2272     struct i965_driver_data *i965 = i965_driver_data(ctx);
2273     struct i965_render_state *render_state = &i965->render_state;
2274     struct gen6_blend_state *blend_state;
2275     
2276     dri_bo_map(render_state->cc.blend, 1);
2277     assert(render_state->cc.blend->virtual);
2278     blend_state = render_state->cc.blend->virtual;
2279     memset(blend_state, 0, sizeof(*blend_state));
2280     blend_state->blend1.logic_op_enable = 1;
2281     blend_state->blend1.logic_op_func = 0xc;
2282     blend_state->blend1.pre_blend_clamp_enable = 1;
2283     dri_bo_unmap(render_state->cc.blend);
2284 }
2285
2286 static void
2287 gen7_render_depth_stencil_state(VADriverContextP ctx)
2288 {
2289     struct i965_driver_data *i965 = i965_driver_data(ctx);
2290     struct i965_render_state *render_state = &i965->render_state;
2291     struct gen6_depth_stencil_state *depth_stencil_state;
2292     
2293     dri_bo_map(render_state->cc.depth_stencil, 1);
2294     assert(render_state->cc.depth_stencil->virtual);
2295     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2296     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2297     dri_bo_unmap(render_state->cc.depth_stencil);
2298 }
2299
2300 static void 
2301 gen7_render_sampler(VADriverContextP ctx)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct i965_render_state *render_state = &i965->render_state;
2305     struct gen7_sampler_state *sampler_state;
2306     int i;
2307     
2308     assert(render_state->wm.sampler_count > 0);
2309     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2310
2311     dri_bo_map(render_state->wm.sampler, 1);
2312     assert(render_state->wm.sampler->virtual);
2313     sampler_state = render_state->wm.sampler->virtual;
2314     for (i = 0; i < render_state->wm.sampler_count; i++) {
2315         memset(sampler_state, 0, sizeof(*sampler_state));
2316         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2317         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2318         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2319         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2320         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2321         sampler_state++;
2322     }
2323
2324     dri_bo_unmap(render_state->wm.sampler);
2325 }
2326
2327 static void
2328 gen7_render_setup_states(
2329     VADriverContextP   ctx,
2330     VASurfaceID        surface,
2331     const VARectangle *src_rect,
2332     const VARectangle *dst_rect,
2333     unsigned int       flags
2334 )
2335 {
2336     i965_render_dest_surface_state(ctx, 0);
2337     i965_render_src_surfaces_state(ctx, surface, flags);
2338     gen7_render_sampler(ctx);
2339     i965_render_cc_viewport(ctx);
2340     gen7_render_color_calc_state(ctx);
2341     gen7_render_blend_state(ctx);
2342     gen7_render_depth_stencil_state(ctx);
2343     i965_render_upload_constants(ctx, surface);
2344     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2345 }
2346
2347 static void
2348 gen7_emit_invarient_states(VADriverContextP ctx)
2349 {
2350     struct i965_driver_data *i965 = i965_driver_data(ctx);
2351     struct intel_batchbuffer *batch = i965->batch;
2352
2353     BEGIN_BATCH(batch, 1);
2354     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2355     ADVANCE_BATCH(batch);
2356
2357     BEGIN_BATCH(batch, 4);
2358     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2359     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2360               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2361     OUT_BATCH(batch, 0);
2362     OUT_BATCH(batch, 0);
2363     ADVANCE_BATCH(batch);
2364
2365     BEGIN_BATCH(batch, 2);
2366     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2367     OUT_BATCH(batch, 1);
2368     ADVANCE_BATCH(batch);
2369
2370     /* Set system instruction pointer */
2371     BEGIN_BATCH(batch, 2);
2372     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2373     OUT_BATCH(batch, 0);
2374     ADVANCE_BATCH(batch);
2375 }
2376
2377 static void
2378 gen7_emit_state_base_address(VADriverContextP ctx)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct intel_batchbuffer *batch = i965->batch;
2382     struct i965_render_state *render_state = &i965->render_state;
2383
2384     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2385     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2386     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2387     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2388     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2389     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2390     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2391     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2392     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2393     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2394 }
2395
2396 static void
2397 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2398 {
2399     struct i965_driver_data *i965 = i965_driver_data(ctx);
2400     struct intel_batchbuffer *batch = i965->batch;
2401     struct i965_render_state *render_state = &i965->render_state;
2402
2403     BEGIN_BATCH(batch, 2);
2404     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2405     OUT_RELOC(batch,
2406               render_state->cc.viewport,
2407               I915_GEM_DOMAIN_INSTRUCTION, 0,
2408               0);
2409     ADVANCE_BATCH(batch);
2410
2411     BEGIN_BATCH(batch, 2);
2412     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2413     OUT_BATCH(batch, 0);
2414     ADVANCE_BATCH(batch);
2415 }
2416
2417 /*
2418  * URB layout on GEN7 
2419  * ----------------------------------------
2420  * | PS Push Constants (8KB) | VS entries |
2421  * ----------------------------------------
2422  */
2423 static void
2424 gen7_emit_urb(VADriverContextP ctx)
2425 {
2426     struct i965_driver_data *i965 = i965_driver_data(ctx);
2427     struct intel_batchbuffer *batch = i965->batch;
2428     unsigned int num_urb_entries = 32;
2429
2430     if (IS_HASWELL(i965->intel.device_id))
2431         num_urb_entries = 64;
2432
2433     BEGIN_BATCH(batch, 2);
2434     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2435     OUT_BATCH(batch, 8); /* in 1KBs */
2436     ADVANCE_BATCH(batch);
2437
2438     BEGIN_BATCH(batch, 2);
2439     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2440     OUT_BATCH(batch, 
2441               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2442               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2443               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2444    ADVANCE_BATCH(batch);
2445
2446    BEGIN_BATCH(batch, 2);
2447    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2448    OUT_BATCH(batch,
2449              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2450              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2451    ADVANCE_BATCH(batch);
2452
2453    BEGIN_BATCH(batch, 2);
2454    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2455    OUT_BATCH(batch,
2456              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2457              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2458    ADVANCE_BATCH(batch);
2459
2460    BEGIN_BATCH(batch, 2);
2461    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2462    OUT_BATCH(batch,
2463              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2464              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2465    ADVANCE_BATCH(batch);
2466 }
2467
2468 static void
2469 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2470 {
2471     struct i965_driver_data *i965 = i965_driver_data(ctx);
2472     struct intel_batchbuffer *batch = i965->batch;
2473     struct i965_render_state *render_state = &i965->render_state;
2474
2475     BEGIN_BATCH(batch, 2);
2476     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2477     OUT_RELOC(batch,
2478               render_state->cc.state,
2479               I915_GEM_DOMAIN_INSTRUCTION, 0,
2480               1);
2481     ADVANCE_BATCH(batch);
2482
2483     BEGIN_BATCH(batch, 2);
2484     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2485     OUT_RELOC(batch,
2486               render_state->cc.blend,
2487               I915_GEM_DOMAIN_INSTRUCTION, 0,
2488               1);
2489     ADVANCE_BATCH(batch);
2490
2491     BEGIN_BATCH(batch, 2);
2492     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2493     OUT_RELOC(batch,
2494               render_state->cc.depth_stencil,
2495               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2496               1);
2497     ADVANCE_BATCH(batch);
2498 }
2499
2500 static void
2501 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2502 {
2503     struct i965_driver_data *i965 = i965_driver_data(ctx);
2504     struct intel_batchbuffer *batch = i965->batch;
2505     struct i965_render_state *render_state = &i965->render_state;
2506
2507     BEGIN_BATCH(batch, 2);
2508     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2509     OUT_RELOC(batch,
2510               render_state->wm.sampler,
2511               I915_GEM_DOMAIN_INSTRUCTION, 0,
2512               0);
2513     ADVANCE_BATCH(batch);
2514 }
2515
2516 static void
2517 gen7_emit_binding_table(VADriverContextP ctx)
2518 {
2519     struct i965_driver_data *i965 = i965_driver_data(ctx);
2520     struct intel_batchbuffer *batch = i965->batch;
2521
2522     BEGIN_BATCH(batch, 2);
2523     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2524     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2525     ADVANCE_BATCH(batch);
2526 }
2527
2528 static void
2529 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2530 {
2531     struct i965_driver_data *i965 = i965_driver_data(ctx);
2532     struct intel_batchbuffer *batch = i965->batch;
2533
2534     BEGIN_BATCH(batch, 7);
2535     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2536     OUT_BATCH(batch,
2537               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2538               (I965_SURFACE_NULL << 29));
2539     OUT_BATCH(batch, 0);
2540     OUT_BATCH(batch, 0);
2541     OUT_BATCH(batch, 0);
2542     OUT_BATCH(batch, 0);
2543     OUT_BATCH(batch, 0);
2544     ADVANCE_BATCH(batch);
2545
2546     BEGIN_BATCH(batch, 3);
2547     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2548     OUT_BATCH(batch, 0);
2549     OUT_BATCH(batch, 0);
2550     ADVANCE_BATCH(batch);
2551 }
2552
2553 static void
2554 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2555 {
2556     i965_render_drawing_rectangle(ctx);
2557 }
2558
2559 static void 
2560 gen7_emit_vs_state(VADriverContextP ctx)
2561 {
2562     struct i965_driver_data *i965 = i965_driver_data(ctx);
2563     struct intel_batchbuffer *batch = i965->batch;
2564
2565     /* disable VS constant buffer */
2566     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2567     OUT_BATCH(batch, 0);
2568     OUT_BATCH(batch, 0);
2569     OUT_BATCH(batch, 0);
2570     OUT_BATCH(batch, 0);
2571     OUT_BATCH(batch, 0);
2572     OUT_BATCH(batch, 0);
2573         
2574     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2575     OUT_BATCH(batch, 0); /* without VS kernel */
2576     OUT_BATCH(batch, 0);
2577     OUT_BATCH(batch, 0);
2578     OUT_BATCH(batch, 0);
2579     OUT_BATCH(batch, 0); /* pass-through */
2580 }
2581
2582 static void 
2583 gen7_emit_bypass_state(VADriverContextP ctx)
2584 {
2585     struct i965_driver_data *i965 = i965_driver_data(ctx);
2586     struct intel_batchbuffer *batch = i965->batch;
2587
2588     /* bypass GS */
2589     BEGIN_BATCH(batch, 7);
2590     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2591     OUT_BATCH(batch, 0);
2592     OUT_BATCH(batch, 0);
2593     OUT_BATCH(batch, 0);
2594     OUT_BATCH(batch, 0);
2595     OUT_BATCH(batch, 0);
2596     OUT_BATCH(batch, 0);
2597     ADVANCE_BATCH(batch);
2598
2599     BEGIN_BATCH(batch, 7);      
2600     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2601     OUT_BATCH(batch, 0); /* without GS kernel */
2602     OUT_BATCH(batch, 0);
2603     OUT_BATCH(batch, 0);
2604     OUT_BATCH(batch, 0);
2605     OUT_BATCH(batch, 0);
2606     OUT_BATCH(batch, 0); /* pass-through */
2607     ADVANCE_BATCH(batch);
2608
2609     BEGIN_BATCH(batch, 2);
2610     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2611     OUT_BATCH(batch, 0);
2612     ADVANCE_BATCH(batch);
2613
2614     /* disable HS */
2615     BEGIN_BATCH(batch, 7);
2616     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2617     OUT_BATCH(batch, 0);
2618     OUT_BATCH(batch, 0);
2619     OUT_BATCH(batch, 0);
2620     OUT_BATCH(batch, 0);
2621     OUT_BATCH(batch, 0);
2622     OUT_BATCH(batch, 0);
2623     ADVANCE_BATCH(batch);
2624
2625     BEGIN_BATCH(batch, 7);
2626     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2627     OUT_BATCH(batch, 0);
2628     OUT_BATCH(batch, 0);
2629     OUT_BATCH(batch, 0);
2630     OUT_BATCH(batch, 0);
2631     OUT_BATCH(batch, 0);
2632     OUT_BATCH(batch, 0);
2633     ADVANCE_BATCH(batch);
2634
2635     BEGIN_BATCH(batch, 2);
2636     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2637     OUT_BATCH(batch, 0);
2638     ADVANCE_BATCH(batch);
2639
2640     /* Disable TE */
2641     BEGIN_BATCH(batch, 4);
2642     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2643     OUT_BATCH(batch, 0);
2644     OUT_BATCH(batch, 0);
2645     OUT_BATCH(batch, 0);
2646     ADVANCE_BATCH(batch);
2647
2648     /* Disable DS */
2649     BEGIN_BATCH(batch, 7);
2650     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2651     OUT_BATCH(batch, 0);
2652     OUT_BATCH(batch, 0);
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     OUT_BATCH(batch, 0);
2656     OUT_BATCH(batch, 0);
2657     ADVANCE_BATCH(batch);
2658
2659     BEGIN_BATCH(batch, 6);
2660     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0);
2665     OUT_BATCH(batch, 0);
2666     ADVANCE_BATCH(batch);
2667
2668     BEGIN_BATCH(batch, 2);
2669     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2670     OUT_BATCH(batch, 0);
2671     ADVANCE_BATCH(batch);
2672
2673     /* Disable STREAMOUT */
2674     BEGIN_BATCH(batch, 3);
2675     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     ADVANCE_BATCH(batch);
2679 }
2680
2681 static void 
2682 gen7_emit_clip_state(VADriverContextP ctx)
2683 {
2684     struct i965_driver_data *i965 = i965_driver_data(ctx);
2685     struct intel_batchbuffer *batch = i965->batch;
2686
2687     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2688     OUT_BATCH(batch, 0);
2689     OUT_BATCH(batch, 0); /* pass-through */
2690     OUT_BATCH(batch, 0);
2691 }
2692
2693 static void 
2694 gen7_emit_sf_state(VADriverContextP ctx)
2695 {
2696     struct i965_driver_data *i965 = i965_driver_data(ctx);
2697     struct intel_batchbuffer *batch = i965->batch;
2698
2699     BEGIN_BATCH(batch, 14);
2700     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2701     OUT_BATCH(batch,
2702               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2703               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2704               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0); /* DW4 */
2708     OUT_BATCH(batch, 0);
2709     OUT_BATCH(batch, 0);
2710     OUT_BATCH(batch, 0);
2711     OUT_BATCH(batch, 0);
2712     OUT_BATCH(batch, 0); /* DW9 */
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     OUT_BATCH(batch, 0);
2716     OUT_BATCH(batch, 0);
2717     ADVANCE_BATCH(batch);
2718
2719     BEGIN_BATCH(batch, 7);
2720     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2723     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2724     OUT_BATCH(batch, 0);
2725     OUT_BATCH(batch, 0);
2726     OUT_BATCH(batch, 0);
2727     ADVANCE_BATCH(batch);
2728 }
2729
2730 static void 
2731 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2732 {
2733     struct i965_driver_data *i965 = i965_driver_data(ctx);
2734     struct intel_batchbuffer *batch = i965->batch;
2735     struct i965_render_state *render_state = &i965->render_state;
2736     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2737     unsigned int num_samples = 0;
2738
2739     if (IS_HASWELL(i965->intel.device_id)) {
2740         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2741         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2742     }
2743
2744     BEGIN_BATCH(batch, 3);
2745     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2746     OUT_BATCH(batch,
2747               GEN7_WM_DISPATCH_ENABLE |
2748               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2749     OUT_BATCH(batch, 0);
2750     ADVANCE_BATCH(batch);
2751
2752     BEGIN_BATCH(batch, 7);
2753     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2754     OUT_BATCH(batch, 1);
2755     OUT_BATCH(batch, 0);
2756     OUT_RELOC(batch, 
2757               render_state->curbe.bo,
2758               I915_GEM_DOMAIN_INSTRUCTION, 0,
2759               0);
2760     OUT_BATCH(batch, 0);
2761     OUT_BATCH(batch, 0);
2762     OUT_BATCH(batch, 0);
2763     ADVANCE_BATCH(batch);
2764
2765     BEGIN_BATCH(batch, 8);
2766     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2767     OUT_RELOC(batch, 
2768               render_state->render_kernels[kernel].bo,
2769               I915_GEM_DOMAIN_INSTRUCTION, 0,
2770               0);
2771     OUT_BATCH(batch, 
2772               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2773               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2774     OUT_BATCH(batch, 0); /* scratch space base offset */
2775     OUT_BATCH(batch, 
2776               ((86 - 1) << max_threads_shift) | num_samples |
2777               GEN7_PS_PUSH_CONSTANT_ENABLE |
2778               GEN7_PS_ATTRIBUTE_ENABLE |
2779               GEN7_PS_16_DISPATCH_ENABLE);
2780     OUT_BATCH(batch, 
2781               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2782     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2783     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2784     ADVANCE_BATCH(batch);
2785 }
2786
2787 static void
2788 gen7_emit_vertex_element_state(VADriverContextP ctx)
2789 {
2790     struct i965_driver_data *i965 = i965_driver_data(ctx);
2791     struct intel_batchbuffer *batch = i965->batch;
2792
2793     /* Set up our vertex elements, sourced from the single vertex buffer. */
2794     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2795     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2796     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2797               GEN6_VE0_VALID |
2798               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2799               (0 << VE0_OFFSET_SHIFT));
2800     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2801               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2802               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2803               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2804     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2805     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2806               GEN6_VE0_VALID |
2807               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2808               (8 << VE0_OFFSET_SHIFT));
2809     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2810               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2811               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2812               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2813 }
2814
2815 static void
2816 gen7_emit_vertices(VADriverContextP ctx)
2817 {
2818     struct i965_driver_data *i965 = i965_driver_data(ctx);
2819     struct intel_batchbuffer *batch = i965->batch;
2820     struct i965_render_state *render_state = &i965->render_state;
2821
2822     BEGIN_BATCH(batch, 5);
2823     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2824     OUT_BATCH(batch, 
2825               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2826               GEN6_VB0_VERTEXDATA |
2827               GEN7_VB0_ADDRESS_MODIFYENABLE |
2828               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2829     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2830     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2831     OUT_BATCH(batch, 0);
2832     ADVANCE_BATCH(batch);
2833
2834     BEGIN_BATCH(batch, 7);
2835     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2836     OUT_BATCH(batch,
2837               _3DPRIM_RECTLIST |
2838               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2839     OUT_BATCH(batch, 3); /* vertex count per instance */
2840     OUT_BATCH(batch, 0); /* start vertex offset */
2841     OUT_BATCH(batch, 1); /* single instance */
2842     OUT_BATCH(batch, 0); /* start instance location */
2843     OUT_BATCH(batch, 0);
2844     ADVANCE_BATCH(batch);
2845 }
2846
2847 static void
2848 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2849 {
2850     struct i965_driver_data *i965 = i965_driver_data(ctx);
2851     struct intel_batchbuffer *batch = i965->batch;
2852
2853     intel_batchbuffer_start_atomic(batch, 0x1000);
2854     intel_batchbuffer_emit_mi_flush(batch);
2855     gen7_emit_invarient_states(ctx);
2856     gen7_emit_state_base_address(ctx);
2857     gen7_emit_viewport_state_pointers(ctx);
2858     gen7_emit_urb(ctx);
2859     gen7_emit_cc_state_pointers(ctx);
2860     gen7_emit_sampler_state_pointers(ctx);
2861     gen7_emit_bypass_state(ctx);
2862     gen7_emit_vs_state(ctx);
2863     gen7_emit_clip_state(ctx);
2864     gen7_emit_sf_state(ctx);
2865     gen7_emit_wm_state(ctx, kernel);
2866     gen7_emit_binding_table(ctx);
2867     gen7_emit_depth_buffer_state(ctx);
2868     gen7_emit_drawing_rectangle(ctx);
2869     gen7_emit_vertex_element_state(ctx);
2870     gen7_emit_vertices(ctx);
2871     intel_batchbuffer_end_atomic(batch);
2872 }
2873
2874 static void
2875 gen7_render_put_surface(
2876     VADriverContextP   ctx,
2877     VASurfaceID        surface,
2878     const VARectangle *src_rect,
2879     const VARectangle *dst_rect,
2880     unsigned int       flags
2881 )
2882 {
2883     struct i965_driver_data *i965 = i965_driver_data(ctx);
2884     struct intel_batchbuffer *batch = i965->batch;
2885
2886     gen7_render_initialize(ctx);
2887     gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2888     i965_clear_dest_region(ctx);
2889     gen7_render_emit_states(ctx, PS_KERNEL);
2890     intel_batchbuffer_flush(batch);
2891 }
2892
2893 static void
2894 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2895 {
2896     struct i965_driver_data *i965 = i965_driver_data(ctx);
2897     struct i965_render_state *render_state = &i965->render_state;
2898     struct gen6_blend_state *blend_state;
2899
2900     dri_bo_unmap(render_state->cc.state);    
2901     dri_bo_map(render_state->cc.blend, 1);
2902     assert(render_state->cc.blend->virtual);
2903     blend_state = render_state->cc.blend->virtual;
2904     memset(blend_state, 0, sizeof(*blend_state));
2905     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2906     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2907     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2908     blend_state->blend0.blend_enable = 1;
2909     blend_state->blend1.post_blend_clamp_enable = 1;
2910     blend_state->blend1.pre_blend_clamp_enable = 1;
2911     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2912     dri_bo_unmap(render_state->cc.blend);
2913 }
2914
2915 static void
2916 gen7_subpicture_render_setup_states(
2917     VADriverContextP   ctx,
2918     VASurfaceID        surface,
2919     const VARectangle *src_rect,
2920     const VARectangle *dst_rect
2921 )
2922 {
2923     i965_render_dest_surface_state(ctx, 0);
2924     i965_subpic_render_src_surfaces_state(ctx, surface);
2925     i965_render_sampler(ctx);
2926     i965_render_cc_viewport(ctx);
2927     gen7_render_color_calc_state(ctx);
2928     gen7_subpicture_render_blend_state(ctx);
2929     gen7_render_depth_stencil_state(ctx);
2930     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2931 }
2932
2933 static void
2934 gen7_render_put_subpicture(
2935     VADriverContextP   ctx,
2936     VASurfaceID        surface,
2937     const VARectangle *src_rect,
2938     const VARectangle *dst_rect
2939 )
2940 {
2941     struct i965_driver_data *i965 = i965_driver_data(ctx);
2942     struct intel_batchbuffer *batch = i965->batch;
2943     struct object_surface *obj_surface = SURFACE(surface);
2944     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2945
2946     assert(obj_subpic);
2947     gen7_render_initialize(ctx);
2948     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2949     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2950     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2951     intel_batchbuffer_flush(batch);
2952 }
2953
2954
2955 /*
2956  * global functions
2957  */
2958 VAStatus 
2959 i965_DestroySurfaces(VADriverContextP ctx,
2960                      VASurfaceID *surface_list,
2961                      int num_surfaces);
2962 void
2963 intel_render_put_surface(
2964     VADriverContextP   ctx,
2965     VASurfaceID        surface,
2966     const VARectangle *src_rect,
2967     const VARectangle *dst_rect,
2968     unsigned int       flags
2969 )
2970 {
2971     struct i965_driver_data *i965 = i965_driver_data(ctx);
2972     int has_done_scaling = 0;
2973     VASurfaceID in_surface_id = surface;
2974     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
2975
2976     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
2977
2978     if (out_surface_id != VA_INVALID_ID)
2979         in_surface_id = out_surface_id;
2980
2981     if (IS_GEN7(i965->intel.device_id))
2982         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2983     else if (IS_GEN6(i965->intel.device_id))
2984         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2985     else
2986         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2987
2988     if (in_surface_id != surface)
2989         i965_DestroySurfaces(ctx, &in_surface_id, 1);
2990 }
2991
2992 void
2993 intel_render_put_subpicture(
2994     VADriverContextP   ctx,
2995     VASurfaceID        surface,
2996     const VARectangle *src_rect,
2997     const VARectangle *dst_rect
2998 )
2999 {
3000     struct i965_driver_data *i965 = i965_driver_data(ctx);
3001
3002     if (IS_GEN7(i965->intel.device_id))
3003         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3004     else if (IS_GEN6(i965->intel.device_id))
3005         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3006     else
3007         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3008 }
3009
3010 Bool 
3011 i965_render_init(VADriverContextP ctx)
3012 {
3013     struct i965_driver_data *i965 = i965_driver_data(ctx);
3014     struct i965_render_state *render_state = &i965->render_state;
3015     int i;
3016
3017     /* kernel */
3018     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3019                                  sizeof(render_kernels_gen5[0])));
3020     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3021                                  sizeof(render_kernels_gen6[0])));
3022
3023     if (IS_GEN7(i965->intel.device_id))
3024         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
3025     else if (IS_GEN6(i965->intel.device_id))
3026         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3027     else if (IS_IRONLAKE(i965->intel.device_id))
3028         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3029     else
3030         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3031
3032     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3033         struct i965_kernel *kernel = &render_state->render_kernels[i];
3034
3035         if (!kernel->size)
3036             continue;
3037
3038         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3039                                   kernel->name, 
3040                                   kernel->size, 0x1000);
3041         assert(kernel->bo);
3042         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3043     }
3044
3045     /* constant buffer */
3046     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3047                       "constant buffer",
3048                       4096, 64);
3049     assert(render_state->curbe.bo);
3050
3051     return True;
3052 }
3053
3054 Bool 
3055 i965_render_terminate(VADriverContextP ctx)
3056 {
3057     int i;
3058     struct i965_driver_data *i965 = i965_driver_data(ctx);
3059     struct i965_render_state *render_state = &i965->render_state;
3060
3061     dri_bo_unreference(render_state->curbe.bo);
3062     render_state->curbe.bo = NULL;
3063
3064     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3065         struct i965_kernel *kernel = &render_state->render_kernels[i];
3066         
3067         dri_bo_unreference(kernel->bo);
3068         kernel->bo = NULL;
3069     }
3070
3071     dri_bo_unreference(render_state->vb.vertex_buffer);
3072     render_state->vb.vertex_buffer = NULL;
3073     dri_bo_unreference(render_state->vs.state);
3074     render_state->vs.state = NULL;
3075     dri_bo_unreference(render_state->sf.state);
3076     render_state->sf.state = NULL;
3077     dri_bo_unreference(render_state->wm.sampler);
3078     render_state->wm.sampler = NULL;
3079     dri_bo_unreference(render_state->wm.state);
3080     render_state->wm.state = NULL;
3081     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3082     dri_bo_unreference(render_state->cc.viewport);
3083     render_state->cc.viewport = NULL;
3084     dri_bo_unreference(render_state->cc.state);
3085     render_state->cc.state = NULL;
3086     dri_bo_unreference(render_state->cc.blend);
3087     render_state->cc.blend = NULL;
3088     dri_bo_unreference(render_state->cc.depth_stencil);
3089     render_state->cc.depth_stencil = NULL;
3090
3091     if (render_state->draw_region) {
3092         dri_bo_unreference(render_state->draw_region->bo);
3093         free(render_state->draw_region);
3094         render_state->draw_region = NULL;
3095     }
3096
3097     return True;
3098 }
3099