a05e40b5001f39e9633d301cf91d0952a13d86fa
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const uint32_t ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const uint32_t ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const uint32_t sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 /* programs for Ivybridge */
120 static const uint32_t sf_kernel_static_gen7[][4] = 
121 {
122 };
123
124 static const uint32_t ps_kernel_static_gen7[][4] = {
125 #include "shaders/render/exa_wm_src_affine.g7b"
126 #include "shaders/render/exa_wm_src_sample_planar.g7b"
127 #include "shaders/render/exa_wm_yuv_rgb.g7b"
128 #include "shaders/render/exa_wm_write.g7b"
129 };
130
131 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
132 #include "shaders/render/exa_wm_src_affine.g7b"
133 #include "shaders/render/exa_wm_src_sample_argb.g7b"
134 #include "shaders/render/exa_wm_write.g7b"
135 };
136
137 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
138 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
139 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
140 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
141 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
142
143 static uint32_t float_to_uint (float f) 
144 {
145     union {
146         uint32_t i; 
147         float f;
148     } x;
149
150     x.f = f;
151     return x.i;
152 }
153
154 enum 
155 {
156     SF_KERNEL = 0,
157     PS_KERNEL,
158     PS_SUBPIC_KERNEL
159 };
160
161 static struct i965_kernel render_kernels_gen4[] = {
162     {
163         "SF",
164         SF_KERNEL,
165         sf_kernel_static,
166         sizeof(sf_kernel_static),
167         NULL
168     },
169     {
170         "PS",
171         PS_KERNEL,
172         ps_kernel_static,
173         sizeof(ps_kernel_static),
174         NULL
175     },
176
177     {
178         "PS_SUBPIC",
179         PS_SUBPIC_KERNEL,
180         ps_subpic_kernel_static,
181         sizeof(ps_subpic_kernel_static),
182         NULL
183     }
184 };
185
186 static struct i965_kernel render_kernels_gen5[] = {
187     {
188         "SF",
189         SF_KERNEL,
190         sf_kernel_static_gen5,
191         sizeof(sf_kernel_static_gen5),
192         NULL
193     },
194     {
195         "PS",
196         PS_KERNEL,
197         ps_kernel_static_gen5,
198         sizeof(ps_kernel_static_gen5),
199         NULL
200     },
201
202     {
203         "PS_SUBPIC",
204         PS_SUBPIC_KERNEL,
205         ps_subpic_kernel_static_gen5,
206         sizeof(ps_subpic_kernel_static_gen5),
207         NULL
208     }
209 };
210
211 static struct i965_kernel render_kernels_gen6[] = {
212     {
213         "SF",
214         SF_KERNEL,
215         sf_kernel_static_gen6,
216         sizeof(sf_kernel_static_gen6),
217         NULL
218     },
219     {
220         "PS",
221         PS_KERNEL,
222         ps_kernel_static_gen6,
223         sizeof(ps_kernel_static_gen6),
224         NULL
225     },
226
227     {
228         "PS_SUBPIC",
229         PS_SUBPIC_KERNEL,
230         ps_subpic_kernel_static_gen6,
231         sizeof(ps_subpic_kernel_static_gen6),
232         NULL
233     }
234 };
235
236 static struct i965_kernel render_kernels_gen7[] = {
237     {
238         "SF",
239         SF_KERNEL,
240         sf_kernel_static_gen7,
241         sizeof(sf_kernel_static_gen7),
242         NULL
243     },
244     {
245         "PS",
246         PS_KERNEL,
247         ps_kernel_static_gen7,
248         sizeof(ps_kernel_static_gen7),
249         NULL
250     },
251
252     {
253         "PS_SUBPIC",
254         PS_SUBPIC_KERNEL,
255         ps_subpic_kernel_static_gen7,
256         sizeof(ps_subpic_kernel_static_gen7),
257         NULL
258     }
259 };
260
261 #define URB_VS_ENTRIES        8
262 #define URB_VS_ENTRY_SIZE     1
263
264 #define URB_GS_ENTRIES        0
265 #define URB_GS_ENTRY_SIZE     0
266
267 #define URB_CLIP_ENTRIES      0
268 #define URB_CLIP_ENTRY_SIZE   0
269
270 #define URB_SF_ENTRIES        1
271 #define URB_SF_ENTRY_SIZE     2
272
273 #define URB_CS_ENTRIES        1
274 #define URB_CS_ENTRY_SIZE     1
275
276 static void
277 i965_render_vs_unit(VADriverContextP ctx)
278 {
279     struct i965_driver_data *i965 = i965_driver_data(ctx);
280     struct i965_render_state *render_state = &i965->render_state;
281     struct i965_vs_unit_state *vs_state;
282
283     dri_bo_map(render_state->vs.state, 1);
284     assert(render_state->vs.state->virtual);
285     vs_state = render_state->vs.state->virtual;
286     memset(vs_state, 0, sizeof(*vs_state));
287
288     if (IS_IRONLAKE(i965->intel.device_id))
289         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
290     else
291         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
292
293     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
294     vs_state->vs6.vs_enable = 0;
295     vs_state->vs6.vert_cache_disable = 1;
296     
297     dri_bo_unmap(render_state->vs.state);
298 }
299
300 static void
301 i965_render_sf_unit(VADriverContextP ctx)
302 {
303     struct i965_driver_data *i965 = i965_driver_data(ctx);
304     struct i965_render_state *render_state = &i965->render_state;
305     struct i965_sf_unit_state *sf_state;
306
307     dri_bo_map(render_state->sf.state, 1);
308     assert(render_state->sf.state->virtual);
309     sf_state = render_state->sf.state->virtual;
310     memset(sf_state, 0, sizeof(*sf_state));
311
312     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
313     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
314
315     sf_state->sf1.single_program_flow = 1; /* XXX */
316     sf_state->sf1.binding_table_entry_count = 0;
317     sf_state->sf1.thread_priority = 0;
318     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
319     sf_state->sf1.illegal_op_exception_enable = 1;
320     sf_state->sf1.mask_stack_exception_enable = 1;
321     sf_state->sf1.sw_exception_enable = 1;
322
323     /* scratch space is not used in our kernel */
324     sf_state->thread2.per_thread_scratch_space = 0;
325     sf_state->thread2.scratch_space_base_pointer = 0;
326
327     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
328     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
329     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
330     sf_state->thread3.urb_entry_read_offset = 0;
331     sf_state->thread3.dispatch_grf_start_reg = 3;
332
333     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
334     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
335     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
336     sf_state->thread4.stats_enable = 1;
337
338     sf_state->sf5.viewport_transform = 0; /* skip viewport */
339
340     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
341     sf_state->sf6.scissor = 0;
342
343     sf_state->sf7.trifan_pv = 2;
344
345     sf_state->sf6.dest_org_vbias = 0x8;
346     sf_state->sf6.dest_org_hbias = 0x8;
347
348     dri_bo_emit_reloc(render_state->sf.state,
349                       I915_GEM_DOMAIN_INSTRUCTION, 0,
350                       sf_state->thread0.grf_reg_count << 1,
351                       offsetof(struct i965_sf_unit_state, thread0),
352                       render_state->render_kernels[SF_KERNEL].bo);
353
354     dri_bo_unmap(render_state->sf.state);
355 }
356
357 static void 
358 i965_render_sampler(VADriverContextP ctx)
359 {
360     struct i965_driver_data *i965 = i965_driver_data(ctx);
361     struct i965_render_state *render_state = &i965->render_state;
362     struct i965_sampler_state *sampler_state;
363     int i;
364     
365     assert(render_state->wm.sampler_count > 0);
366     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
367
368     dri_bo_map(render_state->wm.sampler, 1);
369     assert(render_state->wm.sampler->virtual);
370     sampler_state = render_state->wm.sampler->virtual;
371     for (i = 0; i < render_state->wm.sampler_count; i++) {
372         memset(sampler_state, 0, sizeof(*sampler_state));
373         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
374         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
375         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
376         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
377         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
378         sampler_state++;
379     }
380
381     dri_bo_unmap(render_state->wm.sampler);
382 }
383 static void
384 i965_subpic_render_wm_unit(VADriverContextP ctx)
385 {
386     struct i965_driver_data *i965 = i965_driver_data(ctx);
387     struct i965_render_state *render_state = &i965->render_state;
388     struct i965_wm_unit_state *wm_state;
389
390     assert(render_state->wm.sampler);
391
392     dri_bo_map(render_state->wm.state, 1);
393     assert(render_state->wm.state->virtual);
394     wm_state = render_state->wm.state->virtual;
395     memset(wm_state, 0, sizeof(*wm_state));
396
397     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
398     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
399
400     wm_state->thread1.single_program_flow = 1; /* XXX */
401
402     if (IS_IRONLAKE(i965->intel.device_id))
403         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
404     else
405         wm_state->thread1.binding_table_entry_count = 7;
406
407     wm_state->thread2.scratch_space_base_pointer = 0;
408     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
409
410     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
411     wm_state->thread3.const_urb_entry_read_length = 0;
412     wm_state->thread3.const_urb_entry_read_offset = 0;
413     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
414     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
415
416     wm_state->wm4.stats_enable = 0;
417     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
418
419     if (IS_IRONLAKE(i965->intel.device_id)) {
420         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
421         wm_state->wm5.max_threads = 12 * 6 - 1;
422     } else {
423         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
424         wm_state->wm5.max_threads = 10 * 5 - 1;
425     }
426
427     wm_state->wm5.thread_dispatch_enable = 1;
428     wm_state->wm5.enable_16_pix = 1;
429     wm_state->wm5.enable_8_pix = 0;
430     wm_state->wm5.early_depth_test = 1;
431
432     dri_bo_emit_reloc(render_state->wm.state,
433                       I915_GEM_DOMAIN_INSTRUCTION, 0,
434                       wm_state->thread0.grf_reg_count << 1,
435                       offsetof(struct i965_wm_unit_state, thread0),
436                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
437
438     dri_bo_emit_reloc(render_state->wm.state,
439                       I915_GEM_DOMAIN_INSTRUCTION, 0,
440                       wm_state->wm4.sampler_count << 2,
441                       offsetof(struct i965_wm_unit_state, wm4),
442                       render_state->wm.sampler);
443
444     dri_bo_unmap(render_state->wm.state);
445 }
446
447
448 static void
449 i965_render_wm_unit(VADriverContextP ctx)
450 {
451     struct i965_driver_data *i965 = i965_driver_data(ctx);
452     struct i965_render_state *render_state = &i965->render_state;
453     struct i965_wm_unit_state *wm_state;
454
455     assert(render_state->wm.sampler);
456
457     dri_bo_map(render_state->wm.state, 1);
458     assert(render_state->wm.state->virtual);
459     wm_state = render_state->wm.state->virtual;
460     memset(wm_state, 0, sizeof(*wm_state));
461
462     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
463     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
464
465     wm_state->thread1.single_program_flow = 1; /* XXX */
466
467     if (IS_IRONLAKE(i965->intel.device_id))
468         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
469     else
470         wm_state->thread1.binding_table_entry_count = 7;
471
472     wm_state->thread2.scratch_space_base_pointer = 0;
473     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
474
475     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
476     wm_state->thread3.const_urb_entry_read_length = 1;
477     wm_state->thread3.const_urb_entry_read_offset = 0;
478     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
479     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
480
481     wm_state->wm4.stats_enable = 0;
482     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
483
484     if (IS_IRONLAKE(i965->intel.device_id)) {
485         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
486         wm_state->wm5.max_threads = 12 * 6 - 1;
487     } else {
488         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
489         wm_state->wm5.max_threads = 10 * 5 - 1;
490     }
491
492     wm_state->wm5.thread_dispatch_enable = 1;
493     wm_state->wm5.enable_16_pix = 1;
494     wm_state->wm5.enable_8_pix = 0;
495     wm_state->wm5.early_depth_test = 1;
496
497     dri_bo_emit_reloc(render_state->wm.state,
498                       I915_GEM_DOMAIN_INSTRUCTION, 0,
499                       wm_state->thread0.grf_reg_count << 1,
500                       offsetof(struct i965_wm_unit_state, thread0),
501                       render_state->render_kernels[PS_KERNEL].bo);
502
503     dri_bo_emit_reloc(render_state->wm.state,
504                       I915_GEM_DOMAIN_INSTRUCTION, 0,
505                       wm_state->wm4.sampler_count << 2,
506                       offsetof(struct i965_wm_unit_state, wm4),
507                       render_state->wm.sampler);
508
509     dri_bo_unmap(render_state->wm.state);
510 }
511
512 static void 
513 i965_render_cc_viewport(VADriverContextP ctx)
514 {
515     struct i965_driver_data *i965 = i965_driver_data(ctx);
516     struct i965_render_state *render_state = &i965->render_state;
517     struct i965_cc_viewport *cc_viewport;
518
519     dri_bo_map(render_state->cc.viewport, 1);
520     assert(render_state->cc.viewport->virtual);
521     cc_viewport = render_state->cc.viewport->virtual;
522     memset(cc_viewport, 0, sizeof(*cc_viewport));
523     
524     cc_viewport->min_depth = -1.e35;
525     cc_viewport->max_depth = 1.e35;
526
527     dri_bo_unmap(render_state->cc.viewport);
528 }
529
530 static void 
531 i965_subpic_render_cc_unit(VADriverContextP ctx)
532 {
533     struct i965_driver_data *i965 = i965_driver_data(ctx);
534     struct i965_render_state *render_state = &i965->render_state;
535     struct i965_cc_unit_state *cc_state;
536
537     assert(render_state->cc.viewport);
538
539     dri_bo_map(render_state->cc.state, 1);
540     assert(render_state->cc.state->virtual);
541     cc_state = render_state->cc.state->virtual;
542     memset(cc_state, 0, sizeof(*cc_state));
543
544     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
545     cc_state->cc2.depth_test = 0;       /* disable depth test */
546     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
547     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
548     cc_state->cc3.blend_enable = 1;     /* enable color blend */
549     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
550     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
551     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
552     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
553
554     cc_state->cc5.dither_enable = 0;    /* disable dither */
555     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
556     cc_state->cc5.statistics_enable = 1;
557     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
558     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
559     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
560
561     cc_state->cc6.clamp_post_alpha_blend = 0; 
562     cc_state->cc6.clamp_pre_alpha_blend  =0; 
563     
564     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
565     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
566     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
567     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
568    
569     /*alpha test reference*/
570     cc_state->cc7.alpha_ref.f =0.0 ;
571
572
573     dri_bo_emit_reloc(render_state->cc.state,
574                       I915_GEM_DOMAIN_INSTRUCTION, 0,
575                       0,
576                       offsetof(struct i965_cc_unit_state, cc4),
577                       render_state->cc.viewport);
578
579     dri_bo_unmap(render_state->cc.state);
580 }
581
582
583 static void 
584 i965_render_cc_unit(VADriverContextP ctx)
585 {
586     struct i965_driver_data *i965 = i965_driver_data(ctx);
587     struct i965_render_state *render_state = &i965->render_state;
588     struct i965_cc_unit_state *cc_state;
589
590     assert(render_state->cc.viewport);
591
592     dri_bo_map(render_state->cc.state, 1);
593     assert(render_state->cc.state->virtual);
594     cc_state = render_state->cc.state->virtual;
595     memset(cc_state, 0, sizeof(*cc_state));
596
597     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
598     cc_state->cc2.depth_test = 0;       /* disable depth test */
599     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
600     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
601     cc_state->cc3.blend_enable = 0;     /* disable color blend */
602     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
603     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
604
605     cc_state->cc5.dither_enable = 0;    /* disable dither */
606     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
607     cc_state->cc5.statistics_enable = 1;
608     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
609     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
610     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
611
612     dri_bo_emit_reloc(render_state->cc.state,
613                       I915_GEM_DOMAIN_INSTRUCTION, 0,
614                       0,
615                       offsetof(struct i965_cc_unit_state, cc4),
616                       render_state->cc.viewport);
617
618     dri_bo_unmap(render_state->cc.state);
619 }
620
621 static void
622 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
623 {
624     switch (tiling) {
625     case I915_TILING_NONE:
626         ss->ss3.tiled_surface = 0;
627         ss->ss3.tile_walk = 0;
628         break;
629     case I915_TILING_X:
630         ss->ss3.tiled_surface = 1;
631         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
632         break;
633     case I915_TILING_Y:
634         ss->ss3.tiled_surface = 1;
635         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
636         break;
637     }
638 }
639
640 static void
641 i965_render_set_surface_state(struct i965_surface_state *ss,
642                               dri_bo *bo, unsigned long offset,
643                               int width, int height,
644                               int pitch, int format)
645 {
646     unsigned int tiling;
647     unsigned int swizzle;
648
649     memset(ss, 0, sizeof(*ss));
650     ss->ss0.surface_type = I965_SURFACE_2D;
651     ss->ss0.surface_format = format;
652     ss->ss0.color_blend = 1;
653
654     ss->ss1.base_addr = bo->offset + offset;
655
656     ss->ss2.width = width - 1;
657     ss->ss2.height = height - 1;
658
659     ss->ss3.pitch = pitch - 1;
660
661     dri_bo_get_tiling(bo, &tiling, &swizzle);
662     i965_render_set_surface_tiling(ss, tiling);
663 }
664
665 static void
666 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
667 {
668    switch (tiling) {
669    case I915_TILING_NONE:
670       ss->ss0.tiled_surface = 0;
671       ss->ss0.tile_walk = 0;
672       break;
673    case I915_TILING_X:
674       ss->ss0.tiled_surface = 1;
675       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
676       break;
677    case I915_TILING_Y:
678       ss->ss0.tiled_surface = 1;
679       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
680       break;
681    }
682 }
683
684 static void
685 gen7_render_set_surface_state(struct gen7_surface_state *ss,
686                               dri_bo *bo, unsigned long offset,
687                               int width, int height,
688                               int pitch, int format)
689 {
690     unsigned int tiling;
691     unsigned int swizzle;
692
693     memset(ss, 0, sizeof(*ss));
694
695     ss->ss0.surface_type = I965_SURFACE_2D;
696     ss->ss0.surface_format = format;
697
698     ss->ss1.base_addr = bo->offset + offset;
699
700     ss->ss2.width = width - 1;
701     ss->ss2.height = height - 1;
702
703     ss->ss3.pitch = pitch - 1;
704
705     dri_bo_get_tiling(bo, &tiling, &swizzle);
706     gen7_render_set_surface_tiling(ss, tiling);
707 }
708
709 static void
710 i965_render_src_surface_state(VADriverContextP ctx, 
711                               int index,
712                               dri_bo *region,
713                               unsigned long offset,
714                               int w, int h,
715                               int pitch, int format)
716 {
717     struct i965_driver_data *i965 = i965_driver_data(ctx);  
718     struct i965_render_state *render_state = &i965->render_state;
719     void *ss;
720     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
721
722     assert(index < MAX_RENDER_SURFACES);
723
724     dri_bo_map(ss_bo, 1);
725     assert(ss_bo->virtual);
726     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
727
728     if (IS_GEN7(i965->intel.device_id)) {
729         gen7_render_set_surface_state(ss,
730                                       region, offset,
731                                       w, h,
732                                       pitch, format);
733         dri_bo_emit_reloc(ss_bo,
734                           I915_GEM_DOMAIN_SAMPLER, 0,
735                           offset,
736                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
737                           region);
738     } else {
739         i965_render_set_surface_state(ss,
740                                       region, offset,
741                                       w, h,
742                                       pitch, format);
743         dri_bo_emit_reloc(ss_bo,
744                           I915_GEM_DOMAIN_SAMPLER, 0,
745                           offset,
746                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
747                           region);
748     }
749
750     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
751     dri_bo_unmap(ss_bo);
752     render_state->wm.sampler_count++;
753 }
754
755 static void
756 i965_render_src_surfaces_state(VADriverContextP ctx,
757                               VASurfaceID surface)
758 {
759     struct i965_driver_data *i965 = i965_driver_data(ctx);  
760     struct i965_render_state *render_state = &i965->render_state;
761     struct object_surface *obj_surface;
762     int w, h;
763     int rw, rh;
764     dri_bo *region;
765
766     obj_surface = SURFACE(surface);
767     assert(obj_surface);
768
769     if (obj_surface->pp_out_bo) {
770         w = obj_surface->pp_out_width;
771         h = obj_surface->pp_out_height;
772         rw = obj_surface->orig_pp_out_width;
773         rh = obj_surface->orig_pp_out_height;
774         region = obj_surface->pp_out_bo;
775     } else {
776         w = obj_surface->width;
777         h = obj_surface->height;
778         rw = obj_surface->orig_width;
779         rh = obj_surface->orig_height;
780         region = obj_surface->bo;
781     }
782
783     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
784     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
785
786     if (obj_surface->fourcc == VA_FOURCC('Y','V','1','2')) {
787         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
788
789         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
790         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
791         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
792         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
793     } else {
794         if (obj_surface->fourcc == VA_FOURCC('N','V','1','2')) {
795             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
796             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
797         } else {
798             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
799             
800             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
801             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
802             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
803             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
804         }
805     }
806 }
807
808 static void
809 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
810                               VASurfaceID surface)
811 {
812     struct i965_driver_data *i965 = i965_driver_data(ctx);  
813     struct object_surface *obj_surface = SURFACE(surface);
814     int w, h;
815     dri_bo *region;
816     dri_bo *subpic_region;
817     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
818     struct object_image *obj_image = IMAGE(obj_subpic->image);
819     assert(obj_surface);
820     assert(obj_surface->bo);
821     w = obj_surface->width;
822     h = obj_surface->height;
823     region = obj_surface->bo;
824     subpic_region = obj_image->bo;
825     /*subpicture surface*/
826     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
827     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
828 }
829
830 static void
831 i965_render_dest_surface_state(VADriverContextP ctx, int index)
832 {
833     struct i965_driver_data *i965 = i965_driver_data(ctx);  
834     struct i965_render_state *render_state = &i965->render_state;
835     struct intel_region *dest_region = render_state->draw_region;
836     void *ss;
837     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
838     int format;
839     assert(index < MAX_RENDER_SURFACES);
840
841     if (dest_region->cpp == 2) {
842         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
843     } else {
844         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
845     }
846
847     dri_bo_map(ss_bo, 1);
848     assert(ss_bo->virtual);
849     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
850
851     if (IS_GEN7(i965->intel.device_id)) {
852         gen7_render_set_surface_state(ss,
853                                       dest_region->bo, 0,
854                                       dest_region->width, dest_region->height,
855                                       dest_region->pitch, format);
856         dri_bo_emit_reloc(ss_bo,
857                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
858                           0,
859                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
860                           dest_region->bo);
861     } else {
862         i965_render_set_surface_state(ss,
863                                       dest_region->bo, 0,
864                                       dest_region->width, dest_region->height,
865                                       dest_region->pitch, format);
866         dri_bo_emit_reloc(ss_bo,
867                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
868                           0,
869                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
870                           dest_region->bo);
871     }
872
873     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
874     dri_bo_unmap(ss_bo);
875 }
876
877 static void 
878 i965_subpic_render_upload_vertex(VADriverContextP ctx,
879                                  VASurfaceID surface,
880                                  const VARectangle *output_rect)
881 {    
882     struct i965_driver_data  *i965         = i965_driver_data(ctx);
883     struct i965_render_state *render_state = &i965->render_state;
884     struct object_surface    *obj_surface  = SURFACE(surface);
885     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
886
887     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
888     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
889     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
890     int i = 0;
891
892     VARectangle dst_rect;
893     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
894     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
895     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
896     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
897
898     dri_bo_map(render_state->vb.vertex_buffer, 1);
899     assert(render_state->vb.vertex_buffer->virtual);
900     vb = render_state->vb.vertex_buffer->virtual;
901
902     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
903     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
904     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
905     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
906
907     x1 = (float)dst_rect.x;
908     y1 = (float)dst_rect.y;
909     x2 = (float)(dst_rect.x + dst_rect.width);
910     y2 = (float)(dst_rect.y + dst_rect.height);
911
912     vb[i++] = tx2;
913     vb[i++] = ty2;
914     vb[i++] = x2;
915     vb[i++] = y2;
916
917     vb[i++] = tx1;
918     vb[i++] = ty2;
919     vb[i++] = x1;
920     vb[i++] = y2;
921
922     vb[i++] = tx1;
923     vb[i++] = ty1;
924     vb[i++] = x1;
925     vb[i++] = y1;
926     dri_bo_unmap(render_state->vb.vertex_buffer);
927 }
928
929 static void 
930 i965_render_upload_vertex(VADriverContextP ctx,
931                           VASurfaceID surface,
932                           short srcx,
933                           short srcy,
934                           unsigned short srcw,
935                           unsigned short srch,
936                           short destx,
937                           short desty,
938                           unsigned short destw,
939                           unsigned short desth)
940 {
941     struct i965_driver_data *i965 = i965_driver_data(ctx);
942     struct i965_render_state *render_state = &i965->render_state;
943     struct intel_region *dest_region = render_state->draw_region;
944     struct object_surface *obj_surface;
945     float *vb;
946
947     float u1, v1, u2, v2;
948     int i, width, height;
949     int box_x1 = dest_region->x + destx;
950     int box_y1 = dest_region->y + desty;
951     int box_x2 = box_x1 + destw;
952     int box_y2 = box_y1 + desth;
953
954     obj_surface = SURFACE(surface);
955     assert(surface);
956     width = obj_surface->orig_width;
957     height = obj_surface->orig_height;
958
959     u1 = (float)srcx / width;
960     v1 = (float)srcy / height;
961     u2 = (float)(srcx + srcw) / width;
962     v2 = (float)(srcy + srch) / height;
963
964     dri_bo_map(render_state->vb.vertex_buffer, 1);
965     assert(render_state->vb.vertex_buffer->virtual);
966     vb = render_state->vb.vertex_buffer->virtual;
967
968     i = 0;
969     vb[i++] = u2;
970     vb[i++] = v2;
971     vb[i++] = (float)box_x2;
972     vb[i++] = (float)box_y2;
973     
974     vb[i++] = u1;
975     vb[i++] = v2;
976     vb[i++] = (float)box_x1;
977     vb[i++] = (float)box_y2;
978
979     vb[i++] = u1;
980     vb[i++] = v1;
981     vb[i++] = (float)box_x1;
982     vb[i++] = (float)box_y1;
983
984     dri_bo_unmap(render_state->vb.vertex_buffer);
985 }
986
987 static void
988 i965_render_upload_constants(VADriverContextP ctx)
989 {
990     struct i965_driver_data *i965 = i965_driver_data(ctx);
991     struct i965_render_state *render_state = &i965->render_state;
992     unsigned short *constant_buffer;
993
994     if (render_state->curbe.upload)
995         return;
996
997     dri_bo_map(render_state->curbe.bo, 1);
998     assert(render_state->curbe.bo->virtual);
999     constant_buffer = render_state->curbe.bo->virtual;
1000
1001     if (render_state->interleaved_uv)
1002         *constant_buffer = 1;
1003     else
1004         *constant_buffer = 0;
1005
1006     dri_bo_unmap(render_state->curbe.bo);
1007     render_state->curbe.upload = 1;
1008 }
1009
1010 static void
1011 i965_surface_render_state_setup(VADriverContextP ctx,
1012                         VASurfaceID surface,
1013                         short srcx,
1014                         short srcy,
1015                         unsigned short srcw,
1016                         unsigned short srch,
1017                         short destx,
1018                         short desty,
1019                         unsigned short destw,
1020                         unsigned short desth)
1021 {
1022     i965_render_vs_unit(ctx);
1023     i965_render_sf_unit(ctx);
1024     i965_render_dest_surface_state(ctx, 0);
1025     i965_render_src_surfaces_state(ctx, surface);
1026     i965_render_sampler(ctx);
1027     i965_render_wm_unit(ctx);
1028     i965_render_cc_viewport(ctx);
1029     i965_render_cc_unit(ctx);
1030     i965_render_upload_vertex(ctx, surface,
1031                               srcx, srcy, srcw, srch,
1032                               destx, desty, destw, desth);
1033     i965_render_upload_constants(ctx);
1034 }
1035 static void
1036 i965_subpic_render_state_setup(VADriverContextP ctx,
1037                         VASurfaceID surface,
1038                         short srcx,
1039                         short srcy,
1040                         unsigned short srcw,
1041                         unsigned short srch,
1042                         short destx,
1043                         short desty,
1044                         unsigned short destw,
1045                         unsigned short desth)
1046 {
1047     i965_render_vs_unit(ctx);
1048     i965_render_sf_unit(ctx);
1049     i965_render_dest_surface_state(ctx, 0);
1050     i965_subpic_render_src_surfaces_state(ctx, surface);
1051     i965_render_sampler(ctx);
1052     i965_subpic_render_wm_unit(ctx);
1053     i965_render_cc_viewport(ctx);
1054     i965_subpic_render_cc_unit(ctx);
1055
1056     VARectangle output_rect;
1057     output_rect.x      = destx;
1058     output_rect.y      = desty;
1059     output_rect.width  = destw;
1060     output_rect.height = desth;
1061     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
1062 }
1063
1064
1065 static void
1066 i965_render_pipeline_select(VADriverContextP ctx)
1067 {
1068     struct i965_driver_data *i965 = i965_driver_data(ctx);
1069     struct intel_batchbuffer *batch = i965->batch;
1070  
1071     BEGIN_BATCH(batch, 1);
1072     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1073     ADVANCE_BATCH(batch);
1074 }
1075
1076 static void
1077 i965_render_state_sip(VADriverContextP ctx)
1078 {
1079     struct i965_driver_data *i965 = i965_driver_data(ctx);
1080     struct intel_batchbuffer *batch = i965->batch;
1081
1082     BEGIN_BATCH(batch, 2);
1083     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1084     OUT_BATCH(batch, 0);
1085     ADVANCE_BATCH(batch);
1086 }
1087
1088 static void
1089 i965_render_state_base_address(VADriverContextP ctx)
1090 {
1091     struct i965_driver_data *i965 = i965_driver_data(ctx);
1092     struct intel_batchbuffer *batch = i965->batch;
1093     struct i965_render_state *render_state = &i965->render_state;
1094
1095     if (IS_IRONLAKE(i965->intel.device_id)) {
1096         BEGIN_BATCH(batch, 8);
1097         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1098         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1099         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1100         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1101         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1102         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1103         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1104         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1105         ADVANCE_BATCH(batch);
1106     } else {
1107         BEGIN_BATCH(batch, 6);
1108         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1109         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1110         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1111         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1112         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1113         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1114         ADVANCE_BATCH(batch);
1115     }
1116 }
1117
1118 static void
1119 i965_render_binding_table_pointers(VADriverContextP ctx)
1120 {
1121     struct i965_driver_data *i965 = i965_driver_data(ctx);
1122     struct intel_batchbuffer *batch = i965->batch;
1123
1124     BEGIN_BATCH(batch, 6);
1125     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1126     OUT_BATCH(batch, 0); /* vs */
1127     OUT_BATCH(batch, 0); /* gs */
1128     OUT_BATCH(batch, 0); /* clip */
1129     OUT_BATCH(batch, 0); /* sf */
1130     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1131     ADVANCE_BATCH(batch);
1132 }
1133
1134 static void 
1135 i965_render_constant_color(VADriverContextP ctx)
1136 {
1137     struct i965_driver_data *i965 = i965_driver_data(ctx);
1138     struct intel_batchbuffer *batch = i965->batch;
1139
1140     BEGIN_BATCH(batch, 5);
1141     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1142     OUT_BATCH(batch, float_to_uint(1.0));
1143     OUT_BATCH(batch, float_to_uint(0.0));
1144     OUT_BATCH(batch, float_to_uint(1.0));
1145     OUT_BATCH(batch, float_to_uint(1.0));
1146     ADVANCE_BATCH(batch);
1147 }
1148
1149 static void
1150 i965_render_pipelined_pointers(VADriverContextP ctx)
1151 {
1152     struct i965_driver_data *i965 = i965_driver_data(ctx);
1153     struct intel_batchbuffer *batch = i965->batch;
1154     struct i965_render_state *render_state = &i965->render_state;
1155
1156     BEGIN_BATCH(batch, 7);
1157     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1158     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1159     OUT_BATCH(batch, 0);  /* disable GS */
1160     OUT_BATCH(batch, 0);  /* disable CLIP */
1161     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1162     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1163     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1164     ADVANCE_BATCH(batch);
1165 }
1166
1167 static void
1168 i965_render_urb_layout(VADriverContextP ctx)
1169 {
1170     struct i965_driver_data *i965 = i965_driver_data(ctx);
1171     struct intel_batchbuffer *batch = i965->batch;
1172     int urb_vs_start, urb_vs_size;
1173     int urb_gs_start, urb_gs_size;
1174     int urb_clip_start, urb_clip_size;
1175     int urb_sf_start, urb_sf_size;
1176     int urb_cs_start, urb_cs_size;
1177
1178     urb_vs_start = 0;
1179     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1180     urb_gs_start = urb_vs_start + urb_vs_size;
1181     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1182     urb_clip_start = urb_gs_start + urb_gs_size;
1183     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1184     urb_sf_start = urb_clip_start + urb_clip_size;
1185     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1186     urb_cs_start = urb_sf_start + urb_sf_size;
1187     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1188
1189     BEGIN_BATCH(batch, 3);
1190     OUT_BATCH(batch, 
1191               CMD_URB_FENCE |
1192               UF0_CS_REALLOC |
1193               UF0_SF_REALLOC |
1194               UF0_CLIP_REALLOC |
1195               UF0_GS_REALLOC |
1196               UF0_VS_REALLOC |
1197               1);
1198     OUT_BATCH(batch, 
1199               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1200               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1201               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1202     OUT_BATCH(batch,
1203               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1204               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1205     ADVANCE_BATCH(batch);
1206 }
1207
1208 static void 
1209 i965_render_cs_urb_layout(VADriverContextP ctx)
1210 {
1211     struct i965_driver_data *i965 = i965_driver_data(ctx);
1212     struct intel_batchbuffer *batch = i965->batch;
1213
1214     BEGIN_BATCH(batch, 2);
1215     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1216     OUT_BATCH(batch,
1217               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1218               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1219     ADVANCE_BATCH(batch);
1220 }
1221
1222 static void
1223 i965_render_constant_buffer(VADriverContextP ctx)
1224 {
1225     struct i965_driver_data *i965 = i965_driver_data(ctx);
1226     struct intel_batchbuffer *batch = i965->batch;
1227     struct i965_render_state *render_state = &i965->render_state;
1228
1229     BEGIN_BATCH(batch, 2);
1230     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1231     OUT_RELOC(batch, render_state->curbe.bo,
1232               I915_GEM_DOMAIN_INSTRUCTION, 0,
1233               URB_CS_ENTRY_SIZE - 1);
1234     ADVANCE_BATCH(batch);    
1235 }
1236
1237 static void
1238 i965_render_drawing_rectangle(VADriverContextP ctx)
1239 {
1240     struct i965_driver_data *i965 = i965_driver_data(ctx);
1241     struct intel_batchbuffer *batch = i965->batch;
1242     struct i965_render_state *render_state = &i965->render_state;
1243     struct intel_region *dest_region = render_state->draw_region;
1244
1245     BEGIN_BATCH(batch, 4);
1246     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1247     OUT_BATCH(batch, 0x00000000);
1248     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1249     OUT_BATCH(batch, 0x00000000);         
1250     ADVANCE_BATCH(batch);
1251 }
1252
1253 static void
1254 i965_render_vertex_elements(VADriverContextP ctx)
1255 {
1256     struct i965_driver_data *i965 = i965_driver_data(ctx);
1257     struct intel_batchbuffer *batch = i965->batch;
1258
1259     if (IS_IRONLAKE(i965->intel.device_id)) {
1260         BEGIN_BATCH(batch, 5);
1261         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1262         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1263         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1264                   VE0_VALID |
1265                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1266                   (0 << VE0_OFFSET_SHIFT));
1267         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1268                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1269                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1270                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1271         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1272         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1273                   VE0_VALID |
1274                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1275                   (8 << VE0_OFFSET_SHIFT));
1276         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1277                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1278                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1279                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1280         ADVANCE_BATCH(batch);
1281     } else {
1282         BEGIN_BATCH(batch, 5);
1283         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1284         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1285         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1286                   VE0_VALID |
1287                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1288                   (0 << VE0_OFFSET_SHIFT));
1289         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1290                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1291                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1292                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1293                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1294         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1295         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1296                   VE0_VALID |
1297                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1298                   (8 << VE0_OFFSET_SHIFT));
1299         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1300                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1301                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1302                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1303                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1304         ADVANCE_BATCH(batch);
1305     }
1306 }
1307
1308 static void
1309 i965_render_upload_image_palette(
1310     VADriverContextP ctx,
1311     VAImageID        image_id,
1312     unsigned int     alpha
1313 )
1314 {
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct intel_batchbuffer *batch = i965->batch;
1317     unsigned int i;
1318
1319     struct object_image *obj_image = IMAGE(image_id);
1320     assert(obj_image);
1321
1322     if (obj_image->image.num_palette_entries == 0)
1323         return;
1324
1325     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1326     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1327     /*fill palette*/
1328     //int32_t out[16]; //0-23:color 23-31:alpha
1329     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1330         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1331     ADVANCE_BATCH(batch);
1332 }
1333
1334 static void
1335 i965_render_startup(VADriverContextP ctx)
1336 {
1337     struct i965_driver_data *i965 = i965_driver_data(ctx);
1338     struct intel_batchbuffer *batch = i965->batch;
1339     struct i965_render_state *render_state = &i965->render_state;
1340
1341     BEGIN_BATCH(batch, 11);
1342     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1343     OUT_BATCH(batch, 
1344               (0 << VB0_BUFFER_INDEX_SHIFT) |
1345               VB0_VERTEXDATA |
1346               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1347     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1348
1349     if (IS_IRONLAKE(i965->intel.device_id))
1350         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1351     else
1352         OUT_BATCH(batch, 3);
1353
1354     OUT_BATCH(batch, 0);
1355
1356     OUT_BATCH(batch, 
1357               CMD_3DPRIMITIVE |
1358               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1359               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1360               (0 << 9) |
1361               4);
1362     OUT_BATCH(batch, 3); /* vertex count per instance */
1363     OUT_BATCH(batch, 0); /* start vertex offset */
1364     OUT_BATCH(batch, 1); /* single instance */
1365     OUT_BATCH(batch, 0); /* start instance location */
1366     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1367     ADVANCE_BATCH(batch);
1368 }
1369
1370 static void 
1371 i965_clear_dest_region(VADriverContextP ctx)
1372 {
1373     struct i965_driver_data *i965 = i965_driver_data(ctx);
1374     struct intel_batchbuffer *batch = i965->batch;
1375     struct i965_render_state *render_state = &i965->render_state;
1376     struct intel_region *dest_region = render_state->draw_region;
1377     unsigned int blt_cmd, br13;
1378     int pitch;
1379
1380     blt_cmd = XY_COLOR_BLT_CMD;
1381     br13 = 0xf0 << 16;
1382     pitch = dest_region->pitch;
1383
1384     if (dest_region->cpp == 4) {
1385         br13 |= BR13_8888;
1386         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1387     } else {
1388         assert(dest_region->cpp == 2);
1389         br13 |= BR13_565;
1390     }
1391
1392     if (dest_region->tiling != I915_TILING_NONE) {
1393         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1394         pitch /= 4;
1395     }
1396
1397     br13 |= pitch;
1398
1399     if (IS_GEN6(i965->intel.device_id) ||
1400         IS_GEN7(i965->intel.device_id)) {
1401         intel_batchbuffer_start_atomic_blt(batch, 24);
1402         BEGIN_BLT_BATCH(batch, 6);
1403     } else {
1404         intel_batchbuffer_start_atomic(batch, 24);
1405         BEGIN_BATCH(batch, 6);
1406     }
1407
1408     OUT_BATCH(batch, blt_cmd);
1409     OUT_BATCH(batch, br13);
1410     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1411     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1412               (dest_region->x + dest_region->width));
1413     OUT_RELOC(batch, dest_region->bo, 
1414               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1415               0);
1416     OUT_BATCH(batch, 0x0);
1417     ADVANCE_BATCH(batch);
1418     intel_batchbuffer_end_atomic(batch);
1419 }
1420
1421 static void
1422 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1423 {
1424     struct i965_driver_data *i965 = i965_driver_data(ctx);
1425     struct intel_batchbuffer *batch = i965->batch;
1426
1427     i965_clear_dest_region(ctx);
1428     intel_batchbuffer_start_atomic(batch, 0x1000);
1429     intel_batchbuffer_emit_mi_flush(batch);
1430     i965_render_pipeline_select(ctx);
1431     i965_render_state_sip(ctx);
1432     i965_render_state_base_address(ctx);
1433     i965_render_binding_table_pointers(ctx);
1434     i965_render_constant_color(ctx);
1435     i965_render_pipelined_pointers(ctx);
1436     i965_render_urb_layout(ctx);
1437     i965_render_cs_urb_layout(ctx);
1438     i965_render_constant_buffer(ctx);
1439     i965_render_drawing_rectangle(ctx);
1440     i965_render_vertex_elements(ctx);
1441     i965_render_startup(ctx);
1442     intel_batchbuffer_end_atomic(batch);
1443 }
1444
1445 static void
1446 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1447 {
1448     struct i965_driver_data *i965 = i965_driver_data(ctx);
1449     struct intel_batchbuffer *batch = i965->batch;
1450
1451     intel_batchbuffer_start_atomic(batch, 0x1000);
1452     intel_batchbuffer_emit_mi_flush(batch);
1453     i965_render_pipeline_select(ctx);
1454     i965_render_state_sip(ctx);
1455     i965_render_state_base_address(ctx);
1456     i965_render_binding_table_pointers(ctx);
1457     i965_render_constant_color(ctx);
1458     i965_render_pipelined_pointers(ctx);
1459     i965_render_urb_layout(ctx);
1460     i965_render_cs_urb_layout(ctx);
1461     i965_render_drawing_rectangle(ctx);
1462     i965_render_vertex_elements(ctx);
1463     i965_render_startup(ctx);
1464     intel_batchbuffer_end_atomic(batch);
1465 }
1466
1467
1468 static void 
1469 i965_render_initialize(VADriverContextP ctx)
1470 {
1471     struct i965_driver_data *i965 = i965_driver_data(ctx);
1472     struct i965_render_state *render_state = &i965->render_state;
1473     dri_bo *bo;
1474
1475     /* VERTEX BUFFER */
1476     dri_bo_unreference(render_state->vb.vertex_buffer);
1477     bo = dri_bo_alloc(i965->intel.bufmgr,
1478                       "vertex buffer",
1479                       4096,
1480                       4096);
1481     assert(bo);
1482     render_state->vb.vertex_buffer = bo;
1483
1484     /* VS */
1485     dri_bo_unreference(render_state->vs.state);
1486     bo = dri_bo_alloc(i965->intel.bufmgr,
1487                       "vs state",
1488                       sizeof(struct i965_vs_unit_state),
1489                       64);
1490     assert(bo);
1491     render_state->vs.state = bo;
1492
1493     /* GS */
1494     /* CLIP */
1495     /* SF */
1496     dri_bo_unreference(render_state->sf.state);
1497     bo = dri_bo_alloc(i965->intel.bufmgr,
1498                       "sf state",
1499                       sizeof(struct i965_sf_unit_state),
1500                       64);
1501     assert(bo);
1502     render_state->sf.state = bo;
1503
1504     /* WM */
1505     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1506     bo = dri_bo_alloc(i965->intel.bufmgr,
1507                       "surface state & binding table",
1508                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1509                       4096);
1510     assert(bo);
1511     render_state->wm.surface_state_binding_table_bo = bo;
1512
1513     dri_bo_unreference(render_state->wm.sampler);
1514     bo = dri_bo_alloc(i965->intel.bufmgr,
1515                       "sampler state",
1516                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1517                       64);
1518     assert(bo);
1519     render_state->wm.sampler = bo;
1520     render_state->wm.sampler_count = 0;
1521
1522     dri_bo_unreference(render_state->wm.state);
1523     bo = dri_bo_alloc(i965->intel.bufmgr,
1524                       "wm state",
1525                       sizeof(struct i965_wm_unit_state),
1526                       64);
1527     assert(bo);
1528     render_state->wm.state = bo;
1529
1530     /* COLOR CALCULATOR */
1531     dri_bo_unreference(render_state->cc.state);
1532     bo = dri_bo_alloc(i965->intel.bufmgr,
1533                       "color calc state",
1534                       sizeof(struct i965_cc_unit_state),
1535                       64);
1536     assert(bo);
1537     render_state->cc.state = bo;
1538
1539     dri_bo_unreference(render_state->cc.viewport);
1540     bo = dri_bo_alloc(i965->intel.bufmgr,
1541                       "cc viewport",
1542                       sizeof(struct i965_cc_viewport),
1543                       64);
1544     assert(bo);
1545     render_state->cc.viewport = bo;
1546 }
1547
1548 static void
1549 i965_render_put_surface(VADriverContextP ctx,
1550                         VASurfaceID surface,
1551                         short srcx,
1552                         short srcy,
1553                         unsigned short srcw,
1554                         unsigned short srch,
1555                         short destx,
1556                         short desty,
1557                         unsigned short destw,
1558                         unsigned short desth,
1559                         unsigned int flag)
1560 {
1561     struct i965_driver_data *i965 = i965_driver_data(ctx);
1562     struct intel_batchbuffer *batch = i965->batch;
1563
1564     i965_render_initialize(ctx);
1565     i965_surface_render_state_setup(ctx, surface,
1566                             srcx, srcy, srcw, srch,
1567                             destx, desty, destw, desth);
1568     i965_surface_render_pipeline_setup(ctx);
1569     intel_batchbuffer_flush(batch);
1570 }
1571
1572 static void
1573 i965_render_put_subpicture(VADriverContextP ctx,
1574                            VASurfaceID surface,
1575                            short srcx,
1576                            short srcy,
1577                            unsigned short srcw,
1578                            unsigned short srch,
1579                            short destx,
1580                            short desty,
1581                            unsigned short destw,
1582                            unsigned short desth)
1583 {
1584     struct i965_driver_data *i965 = i965_driver_data(ctx);
1585     struct intel_batchbuffer *batch = i965->batch;
1586     struct object_surface *obj_surface = SURFACE(surface);
1587     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1588
1589     assert(obj_subpic);
1590
1591     i965_render_initialize(ctx);
1592     i965_subpic_render_state_setup(ctx, surface,
1593                                    srcx, srcy, srcw, srch,
1594                                    destx, desty, destw, desth);
1595     i965_subpic_render_pipeline_setup(ctx);
1596     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1597     intel_batchbuffer_flush(batch);
1598 }
1599
1600 /*
1601  * for GEN6+
1602  */
1603 static void 
1604 gen6_render_initialize(VADriverContextP ctx)
1605 {
1606     struct i965_driver_data *i965 = i965_driver_data(ctx);
1607     struct i965_render_state *render_state = &i965->render_state;
1608     dri_bo *bo;
1609
1610     /* VERTEX BUFFER */
1611     dri_bo_unreference(render_state->vb.vertex_buffer);
1612     bo = dri_bo_alloc(i965->intel.bufmgr,
1613                       "vertex buffer",
1614                       4096,
1615                       4096);
1616     assert(bo);
1617     render_state->vb.vertex_buffer = bo;
1618
1619     /* WM */
1620     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1621     bo = dri_bo_alloc(i965->intel.bufmgr,
1622                       "surface state & binding table",
1623                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1624                       4096);
1625     assert(bo);
1626     render_state->wm.surface_state_binding_table_bo = bo;
1627
1628     dri_bo_unreference(render_state->wm.sampler);
1629     bo = dri_bo_alloc(i965->intel.bufmgr,
1630                       "sampler state",
1631                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1632                       4096);
1633     assert(bo);
1634     render_state->wm.sampler = bo;
1635     render_state->wm.sampler_count = 0;
1636
1637     /* COLOR CALCULATOR */
1638     dri_bo_unreference(render_state->cc.state);
1639     bo = dri_bo_alloc(i965->intel.bufmgr,
1640                       "color calc state",
1641                       sizeof(struct gen6_color_calc_state),
1642                       4096);
1643     assert(bo);
1644     render_state->cc.state = bo;
1645
1646     /* CC VIEWPORT */
1647     dri_bo_unreference(render_state->cc.viewport);
1648     bo = dri_bo_alloc(i965->intel.bufmgr,
1649                       "cc viewport",
1650                       sizeof(struct i965_cc_viewport),
1651                       4096);
1652     assert(bo);
1653     render_state->cc.viewport = bo;
1654
1655     /* BLEND STATE */
1656     dri_bo_unreference(render_state->cc.blend);
1657     bo = dri_bo_alloc(i965->intel.bufmgr,
1658                       "blend state",
1659                       sizeof(struct gen6_blend_state),
1660                       4096);
1661     assert(bo);
1662     render_state->cc.blend = bo;
1663
1664     /* DEPTH & STENCIL STATE */
1665     dri_bo_unreference(render_state->cc.depth_stencil);
1666     bo = dri_bo_alloc(i965->intel.bufmgr,
1667                       "depth & stencil state",
1668                       sizeof(struct gen6_depth_stencil_state),
1669                       4096);
1670     assert(bo);
1671     render_state->cc.depth_stencil = bo;
1672 }
1673
1674 static void
1675 gen6_render_color_calc_state(VADriverContextP ctx)
1676 {
1677     struct i965_driver_data *i965 = i965_driver_data(ctx);
1678     struct i965_render_state *render_state = &i965->render_state;
1679     struct gen6_color_calc_state *color_calc_state;
1680     
1681     dri_bo_map(render_state->cc.state, 1);
1682     assert(render_state->cc.state->virtual);
1683     color_calc_state = render_state->cc.state->virtual;
1684     memset(color_calc_state, 0, sizeof(*color_calc_state));
1685     color_calc_state->constant_r = 1.0;
1686     color_calc_state->constant_g = 0.0;
1687     color_calc_state->constant_b = 1.0;
1688     color_calc_state->constant_a = 1.0;
1689     dri_bo_unmap(render_state->cc.state);
1690 }
1691
1692 static void
1693 gen6_render_blend_state(VADriverContextP ctx)
1694 {
1695     struct i965_driver_data *i965 = i965_driver_data(ctx);
1696     struct i965_render_state *render_state = &i965->render_state;
1697     struct gen6_blend_state *blend_state;
1698     
1699     dri_bo_map(render_state->cc.blend, 1);
1700     assert(render_state->cc.blend->virtual);
1701     blend_state = render_state->cc.blend->virtual;
1702     memset(blend_state, 0, sizeof(*blend_state));
1703     blend_state->blend1.logic_op_enable = 1;
1704     blend_state->blend1.logic_op_func = 0xc;
1705     dri_bo_unmap(render_state->cc.blend);
1706 }
1707
1708 static void
1709 gen6_render_depth_stencil_state(VADriverContextP ctx)
1710 {
1711     struct i965_driver_data *i965 = i965_driver_data(ctx);
1712     struct i965_render_state *render_state = &i965->render_state;
1713     struct gen6_depth_stencil_state *depth_stencil_state;
1714     
1715     dri_bo_map(render_state->cc.depth_stencil, 1);
1716     assert(render_state->cc.depth_stencil->virtual);
1717     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1718     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1719     dri_bo_unmap(render_state->cc.depth_stencil);
1720 }
1721
1722 static void
1723 gen6_render_setup_states(VADriverContextP ctx,
1724                          VASurfaceID surface,
1725                          short srcx,
1726                          short srcy,
1727                          unsigned short srcw,
1728                          unsigned short srch,
1729                          short destx,
1730                          short desty,
1731                          unsigned short destw,
1732                          unsigned short desth)
1733 {
1734     i965_render_dest_surface_state(ctx, 0);
1735     i965_render_src_surfaces_state(ctx, surface);
1736     i965_render_sampler(ctx);
1737     i965_render_cc_viewport(ctx);
1738     gen6_render_color_calc_state(ctx);
1739     gen6_render_blend_state(ctx);
1740     gen6_render_depth_stencil_state(ctx);
1741     i965_render_upload_constants(ctx);
1742     i965_render_upload_vertex(ctx, surface,
1743                               srcx, srcy, srcw, srch,
1744                               destx, desty, destw, desth);
1745 }
1746
1747 static void
1748 gen6_emit_invarient_states(VADriverContextP ctx)
1749 {
1750     struct i965_driver_data *i965 = i965_driver_data(ctx);
1751     struct intel_batchbuffer *batch = i965->batch;
1752
1753     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1754
1755     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1756     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1757               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1758     OUT_BATCH(batch, 0);
1759
1760     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1761     OUT_BATCH(batch, 1);
1762
1763     /* Set system instruction pointer */
1764     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1765     OUT_BATCH(batch, 0);
1766 }
1767
1768 static void
1769 gen6_emit_state_base_address(VADriverContextP ctx)
1770 {
1771     struct i965_driver_data *i965 = i965_driver_data(ctx);
1772     struct intel_batchbuffer *batch = i965->batch;
1773     struct i965_render_state *render_state = &i965->render_state;
1774
1775     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1776     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1777     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1778     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1779     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1780     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1781     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1782     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1783     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1784     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1785 }
1786
1787 static void
1788 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1789 {
1790     struct i965_driver_data *i965 = i965_driver_data(ctx);
1791     struct intel_batchbuffer *batch = i965->batch;
1792     struct i965_render_state *render_state = &i965->render_state;
1793
1794     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1795               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1796               (4 - 2));
1797     OUT_BATCH(batch, 0);
1798     OUT_BATCH(batch, 0);
1799     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1800 }
1801
1802 static void
1803 gen6_emit_urb(VADriverContextP ctx)
1804 {
1805     struct i965_driver_data *i965 = i965_driver_data(ctx);
1806     struct intel_batchbuffer *batch = i965->batch;
1807
1808     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1809     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1810               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1811     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1812               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1813 }
1814
1815 static void
1816 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1817 {
1818     struct i965_driver_data *i965 = i965_driver_data(ctx);
1819     struct intel_batchbuffer *batch = i965->batch;
1820     struct i965_render_state *render_state = &i965->render_state;
1821
1822     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1823     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1824     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1825     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1826 }
1827
1828 static void
1829 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1830 {
1831     struct i965_driver_data *i965 = i965_driver_data(ctx);
1832     struct intel_batchbuffer *batch = i965->batch;
1833     struct i965_render_state *render_state = &i965->render_state;
1834
1835     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1836               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1837               (4 - 2));
1838     OUT_BATCH(batch, 0); /* VS */
1839     OUT_BATCH(batch, 0); /* GS */
1840     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1841 }
1842
1843 static void
1844 gen6_emit_binding_table(VADriverContextP ctx)
1845 {
1846     struct i965_driver_data *i965 = i965_driver_data(ctx);
1847     struct intel_batchbuffer *batch = i965->batch;
1848
1849     /* Binding table pointers */
1850     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1851               GEN6_BINDING_TABLE_MODIFY_PS |
1852               (4 - 2));
1853     OUT_BATCH(batch, 0);                /* vs */
1854     OUT_BATCH(batch, 0);                /* gs */
1855     /* Only the PS uses the binding table */
1856     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1857 }
1858
1859 static void
1860 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1861 {
1862     struct i965_driver_data *i965 = i965_driver_data(ctx);
1863     struct intel_batchbuffer *batch = i965->batch;
1864
1865     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1866     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1867               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1868     OUT_BATCH(batch, 0);
1869     OUT_BATCH(batch, 0);
1870     OUT_BATCH(batch, 0);
1871     OUT_BATCH(batch, 0);
1872     OUT_BATCH(batch, 0);
1873
1874     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1875     OUT_BATCH(batch, 0);
1876 }
1877
1878 static void
1879 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1880 {
1881     i965_render_drawing_rectangle(ctx);
1882 }
1883
1884 static void 
1885 gen6_emit_vs_state(VADriverContextP ctx)
1886 {
1887     struct i965_driver_data *i965 = i965_driver_data(ctx);
1888     struct intel_batchbuffer *batch = i965->batch;
1889
1890     /* disable VS constant buffer */
1891     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1892     OUT_BATCH(batch, 0);
1893     OUT_BATCH(batch, 0);
1894     OUT_BATCH(batch, 0);
1895     OUT_BATCH(batch, 0);
1896         
1897     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1898     OUT_BATCH(batch, 0); /* without VS kernel */
1899     OUT_BATCH(batch, 0);
1900     OUT_BATCH(batch, 0);
1901     OUT_BATCH(batch, 0);
1902     OUT_BATCH(batch, 0); /* pass-through */
1903 }
1904
1905 static void 
1906 gen6_emit_gs_state(VADriverContextP ctx)
1907 {
1908     struct i965_driver_data *i965 = i965_driver_data(ctx);
1909     struct intel_batchbuffer *batch = i965->batch;
1910
1911     /* disable GS constant buffer */
1912     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1913     OUT_BATCH(batch, 0);
1914     OUT_BATCH(batch, 0);
1915     OUT_BATCH(batch, 0);
1916     OUT_BATCH(batch, 0);
1917         
1918     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1919     OUT_BATCH(batch, 0); /* without GS kernel */
1920     OUT_BATCH(batch, 0);
1921     OUT_BATCH(batch, 0);
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924     OUT_BATCH(batch, 0); /* pass-through */
1925 }
1926
1927 static void 
1928 gen6_emit_clip_state(VADriverContextP ctx)
1929 {
1930     struct i965_driver_data *i965 = i965_driver_data(ctx);
1931     struct intel_batchbuffer *batch = i965->batch;
1932
1933     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1934     OUT_BATCH(batch, 0);
1935     OUT_BATCH(batch, 0); /* pass-through */
1936     OUT_BATCH(batch, 0);
1937 }
1938
1939 static void 
1940 gen6_emit_sf_state(VADriverContextP ctx)
1941 {
1942     struct i965_driver_data *i965 = i965_driver_data(ctx);
1943     struct intel_batchbuffer *batch = i965->batch;
1944
1945     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1946     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1947               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1948               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1949     OUT_BATCH(batch, 0);
1950     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1951     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1952     OUT_BATCH(batch, 0);
1953     OUT_BATCH(batch, 0);
1954     OUT_BATCH(batch, 0);
1955     OUT_BATCH(batch, 0);
1956     OUT_BATCH(batch, 0); /* DW9 */
1957     OUT_BATCH(batch, 0);
1958     OUT_BATCH(batch, 0);
1959     OUT_BATCH(batch, 0);
1960     OUT_BATCH(batch, 0);
1961     OUT_BATCH(batch, 0); /* DW14 */
1962     OUT_BATCH(batch, 0);
1963     OUT_BATCH(batch, 0);
1964     OUT_BATCH(batch, 0);
1965     OUT_BATCH(batch, 0);
1966     OUT_BATCH(batch, 0); /* DW19 */
1967 }
1968
1969 static void 
1970 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1971 {
1972     struct i965_driver_data *i965 = i965_driver_data(ctx);
1973     struct intel_batchbuffer *batch = i965->batch;
1974     struct i965_render_state *render_state = &i965->render_state;
1975
1976     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1977               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1978               (5 - 2));
1979     OUT_RELOC(batch, 
1980               render_state->curbe.bo,
1981               I915_GEM_DOMAIN_INSTRUCTION, 0,
1982               0);
1983     OUT_BATCH(batch, 0);
1984     OUT_BATCH(batch, 0);
1985     OUT_BATCH(batch, 0);
1986
1987     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1988     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1989               I915_GEM_DOMAIN_INSTRUCTION, 0,
1990               0);
1991     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1992               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1993     OUT_BATCH(batch, 0);
1994     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1995     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1996               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1997               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1998     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1999               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2000     OUT_BATCH(batch, 0);
2001     OUT_BATCH(batch, 0);
2002 }
2003
2004 static void
2005 gen6_emit_vertex_element_state(VADriverContextP ctx)
2006 {
2007     struct i965_driver_data *i965 = i965_driver_data(ctx);
2008     struct intel_batchbuffer *batch = i965->batch;
2009
2010     /* Set up our vertex elements, sourced from the single vertex buffer. */
2011     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2012     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2013     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2014               GEN6_VE0_VALID |
2015               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2016               (0 << VE0_OFFSET_SHIFT));
2017     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2018               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2019               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2020               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2021     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2022     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2023               GEN6_VE0_VALID |
2024               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2025               (8 << VE0_OFFSET_SHIFT));
2026     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2027               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2028               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2029               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2030 }
2031
2032 static void
2033 gen6_emit_vertices(VADriverContextP ctx)
2034 {
2035     struct i965_driver_data *i965 = i965_driver_data(ctx);
2036     struct intel_batchbuffer *batch = i965->batch;
2037     struct i965_render_state *render_state = &i965->render_state;
2038
2039     BEGIN_BATCH(batch, 11);
2040     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2041     OUT_BATCH(batch, 
2042               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2043               GEN6_VB0_VERTEXDATA |
2044               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2045     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2046     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2047     OUT_BATCH(batch, 0);
2048
2049     OUT_BATCH(batch, 
2050               CMD_3DPRIMITIVE |
2051               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2052               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2053               (0 << 9) |
2054               4);
2055     OUT_BATCH(batch, 3); /* vertex count per instance */
2056     OUT_BATCH(batch, 0); /* start vertex offset */
2057     OUT_BATCH(batch, 1); /* single instance */
2058     OUT_BATCH(batch, 0); /* start instance location */
2059     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2060     ADVANCE_BATCH(batch);
2061 }
2062
2063 static void
2064 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2065 {
2066     struct i965_driver_data *i965 = i965_driver_data(ctx);
2067     struct intel_batchbuffer *batch = i965->batch;
2068
2069     intel_batchbuffer_start_atomic(batch, 0x1000);
2070     intel_batchbuffer_emit_mi_flush(batch);
2071     gen6_emit_invarient_states(ctx);
2072     gen6_emit_state_base_address(ctx);
2073     gen6_emit_viewport_state_pointers(ctx);
2074     gen6_emit_urb(ctx);
2075     gen6_emit_cc_state_pointers(ctx);
2076     gen6_emit_sampler_state_pointers(ctx);
2077     gen6_emit_vs_state(ctx);
2078     gen6_emit_gs_state(ctx);
2079     gen6_emit_clip_state(ctx);
2080     gen6_emit_sf_state(ctx);
2081     gen6_emit_wm_state(ctx, kernel);
2082     gen6_emit_binding_table(ctx);
2083     gen6_emit_depth_buffer_state(ctx);
2084     gen6_emit_drawing_rectangle(ctx);
2085     gen6_emit_vertex_element_state(ctx);
2086     gen6_emit_vertices(ctx);
2087     intel_batchbuffer_end_atomic(batch);
2088 }
2089
2090 static void
2091 gen6_render_put_surface(VADriverContextP ctx,
2092                         VASurfaceID surface,
2093                         short srcx,
2094                         short srcy,
2095                         unsigned short srcw,
2096                         unsigned short srch,
2097                         short destx,
2098                         short desty,
2099                         unsigned short destw,
2100                         unsigned short desth,
2101                         unsigned int flag)
2102 {
2103     struct i965_driver_data *i965 = i965_driver_data(ctx);
2104     struct intel_batchbuffer *batch = i965->batch;
2105
2106     gen6_render_initialize(ctx);
2107     gen6_render_setup_states(ctx, surface,
2108                              srcx, srcy, srcw, srch,
2109                              destx, desty, destw, desth);
2110     i965_clear_dest_region(ctx);
2111     gen6_render_emit_states(ctx, PS_KERNEL);
2112     intel_batchbuffer_flush(batch);
2113 }
2114
2115 static void
2116 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2117 {
2118     struct i965_driver_data *i965 = i965_driver_data(ctx);
2119     struct i965_render_state *render_state = &i965->render_state;
2120     struct gen6_blend_state *blend_state;
2121
2122     dri_bo_unmap(render_state->cc.state);    
2123     dri_bo_map(render_state->cc.blend, 1);
2124     assert(render_state->cc.blend->virtual);
2125     blend_state = render_state->cc.blend->virtual;
2126     memset(blend_state, 0, sizeof(*blend_state));
2127     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2128     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2129     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2130     blend_state->blend0.blend_enable = 1;
2131     blend_state->blend1.post_blend_clamp_enable = 1;
2132     blend_state->blend1.pre_blend_clamp_enable = 1;
2133     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2134     dri_bo_unmap(render_state->cc.blend);
2135 }
2136
2137 static void
2138 gen6_subpicture_render_setup_states(VADriverContextP ctx,
2139                                     VASurfaceID surface,
2140                                     short srcx,
2141                                     short srcy,
2142                                     unsigned short srcw,
2143                                     unsigned short srch,
2144                                     short destx,
2145                                     short desty,
2146                                     unsigned short destw,
2147                                     unsigned short desth)
2148 {
2149     VARectangle output_rect;
2150
2151     output_rect.x      = destx;
2152     output_rect.y      = desty;
2153     output_rect.width  = destw;
2154     output_rect.height = desth;
2155
2156     i965_render_dest_surface_state(ctx, 0);
2157     i965_subpic_render_src_surfaces_state(ctx, surface);
2158     i965_render_sampler(ctx);
2159     i965_render_cc_viewport(ctx);
2160     gen6_render_color_calc_state(ctx);
2161     gen6_subpicture_render_blend_state(ctx);
2162     gen6_render_depth_stencil_state(ctx);
2163     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
2164 }
2165
2166 static void
2167 gen6_render_put_subpicture(VADriverContextP ctx,
2168                            VASurfaceID surface,
2169                            short srcx,
2170                            short srcy,
2171                            unsigned short srcw,
2172                            unsigned short srch,
2173                            short destx,
2174                            short desty,
2175                            unsigned short destw,
2176                            unsigned short desth)
2177 {
2178     struct i965_driver_data *i965 = i965_driver_data(ctx);
2179     struct intel_batchbuffer *batch = i965->batch;
2180     struct object_surface *obj_surface = SURFACE(surface);
2181     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2182
2183     assert(obj_subpic);
2184     gen6_render_initialize(ctx);
2185     gen6_subpicture_render_setup_states(ctx, surface,
2186                                         srcx, srcy, srcw, srch,
2187                                         destx, desty, destw, desth);
2188     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2189     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2190     intel_batchbuffer_flush(batch);
2191 }
2192
2193 /*
2194  * for GEN7
2195  */
2196 static void 
2197 gen7_render_initialize(VADriverContextP ctx)
2198 {
2199     struct i965_driver_data *i965 = i965_driver_data(ctx);
2200     struct i965_render_state *render_state = &i965->render_state;
2201     dri_bo *bo;
2202
2203     /* VERTEX BUFFER */
2204     dri_bo_unreference(render_state->vb.vertex_buffer);
2205     bo = dri_bo_alloc(i965->intel.bufmgr,
2206                       "vertex buffer",
2207                       4096,
2208                       4096);
2209     assert(bo);
2210     render_state->vb.vertex_buffer = bo;
2211
2212     /* WM */
2213     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2214     bo = dri_bo_alloc(i965->intel.bufmgr,
2215                       "surface state & binding table",
2216                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2217                       4096);
2218     assert(bo);
2219     render_state->wm.surface_state_binding_table_bo = bo;
2220
2221     dri_bo_unreference(render_state->wm.sampler);
2222     bo = dri_bo_alloc(i965->intel.bufmgr,
2223                       "sampler state",
2224                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2225                       4096);
2226     assert(bo);
2227     render_state->wm.sampler = bo;
2228     render_state->wm.sampler_count = 0;
2229
2230     /* COLOR CALCULATOR */
2231     dri_bo_unreference(render_state->cc.state);
2232     bo = dri_bo_alloc(i965->intel.bufmgr,
2233                       "color calc state",
2234                       sizeof(struct gen6_color_calc_state),
2235                       4096);
2236     assert(bo);
2237     render_state->cc.state = bo;
2238
2239     /* CC VIEWPORT */
2240     dri_bo_unreference(render_state->cc.viewport);
2241     bo = dri_bo_alloc(i965->intel.bufmgr,
2242                       "cc viewport",
2243                       sizeof(struct i965_cc_viewport),
2244                       4096);
2245     assert(bo);
2246     render_state->cc.viewport = bo;
2247
2248     /* BLEND STATE */
2249     dri_bo_unreference(render_state->cc.blend);
2250     bo = dri_bo_alloc(i965->intel.bufmgr,
2251                       "blend state",
2252                       sizeof(struct gen6_blend_state),
2253                       4096);
2254     assert(bo);
2255     render_state->cc.blend = bo;
2256
2257     /* DEPTH & STENCIL STATE */
2258     dri_bo_unreference(render_state->cc.depth_stencil);
2259     bo = dri_bo_alloc(i965->intel.bufmgr,
2260                       "depth & stencil state",
2261                       sizeof(struct gen6_depth_stencil_state),
2262                       4096);
2263     assert(bo);
2264     render_state->cc.depth_stencil = bo;
2265 }
2266
2267 static void
2268 gen7_render_color_calc_state(VADriverContextP ctx)
2269 {
2270     struct i965_driver_data *i965 = i965_driver_data(ctx);
2271     struct i965_render_state *render_state = &i965->render_state;
2272     struct gen6_color_calc_state *color_calc_state;
2273     
2274     dri_bo_map(render_state->cc.state, 1);
2275     assert(render_state->cc.state->virtual);
2276     color_calc_state = render_state->cc.state->virtual;
2277     memset(color_calc_state, 0, sizeof(*color_calc_state));
2278     color_calc_state->constant_r = 1.0;
2279     color_calc_state->constant_g = 0.0;
2280     color_calc_state->constant_b = 1.0;
2281     color_calc_state->constant_a = 1.0;
2282     dri_bo_unmap(render_state->cc.state);
2283 }
2284
2285 static void
2286 gen7_render_blend_state(VADriverContextP ctx)
2287 {
2288     struct i965_driver_data *i965 = i965_driver_data(ctx);
2289     struct i965_render_state *render_state = &i965->render_state;
2290     struct gen6_blend_state *blend_state;
2291     
2292     dri_bo_map(render_state->cc.blend, 1);
2293     assert(render_state->cc.blend->virtual);
2294     blend_state = render_state->cc.blend->virtual;
2295     memset(blend_state, 0, sizeof(*blend_state));
2296     blend_state->blend1.logic_op_enable = 1;
2297     blend_state->blend1.logic_op_func = 0xc;
2298     blend_state->blend1.pre_blend_clamp_enable = 1;
2299     dri_bo_unmap(render_state->cc.blend);
2300 }
2301
2302 static void
2303 gen7_render_depth_stencil_state(VADriverContextP ctx)
2304 {
2305     struct i965_driver_data *i965 = i965_driver_data(ctx);
2306     struct i965_render_state *render_state = &i965->render_state;
2307     struct gen6_depth_stencil_state *depth_stencil_state;
2308     
2309     dri_bo_map(render_state->cc.depth_stencil, 1);
2310     assert(render_state->cc.depth_stencil->virtual);
2311     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2312     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2313     dri_bo_unmap(render_state->cc.depth_stencil);
2314 }
2315
2316 static void 
2317 gen7_render_sampler(VADriverContextP ctx)
2318 {
2319     struct i965_driver_data *i965 = i965_driver_data(ctx);
2320     struct i965_render_state *render_state = &i965->render_state;
2321     struct gen7_sampler_state *sampler_state;
2322     int i;
2323     
2324     assert(render_state->wm.sampler_count > 0);
2325     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2326
2327     dri_bo_map(render_state->wm.sampler, 1);
2328     assert(render_state->wm.sampler->virtual);
2329     sampler_state = render_state->wm.sampler->virtual;
2330     for (i = 0; i < render_state->wm.sampler_count; i++) {
2331         memset(sampler_state, 0, sizeof(*sampler_state));
2332         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2333         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2334         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2335         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2336         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2337         sampler_state++;
2338     }
2339
2340     dri_bo_unmap(render_state->wm.sampler);
2341 }
2342
2343 static void
2344 gen7_render_setup_states(VADriverContextP ctx,
2345                          VASurfaceID surface,
2346                          short srcx,
2347                          short srcy,
2348                          unsigned short srcw,
2349                          unsigned short srch,
2350                          short destx,
2351                          short desty,
2352                          unsigned short destw,
2353                          unsigned short desth)
2354 {
2355     i965_render_dest_surface_state(ctx, 0);
2356     i965_render_src_surfaces_state(ctx, surface);
2357     gen7_render_sampler(ctx);
2358     i965_render_cc_viewport(ctx);
2359     gen7_render_color_calc_state(ctx);
2360     gen7_render_blend_state(ctx);
2361     gen7_render_depth_stencil_state(ctx);
2362     i965_render_upload_constants(ctx);
2363     i965_render_upload_vertex(ctx, surface,
2364                               srcx, srcy, srcw, srch,
2365                               destx, desty, destw, desth);
2366 }
2367
2368 static void
2369 gen7_emit_invarient_states(VADriverContextP ctx)
2370 {
2371     struct i965_driver_data *i965 = i965_driver_data(ctx);
2372     struct intel_batchbuffer *batch = i965->batch;
2373
2374     BEGIN_BATCH(batch, 1);
2375     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2376     ADVANCE_BATCH(batch);
2377
2378     BEGIN_BATCH(batch, 4);
2379     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2380     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2381               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2382     OUT_BATCH(batch, 0);
2383     OUT_BATCH(batch, 0);
2384     ADVANCE_BATCH(batch);
2385
2386     BEGIN_BATCH(batch, 2);
2387     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2388     OUT_BATCH(batch, 1);
2389     ADVANCE_BATCH(batch);
2390
2391     /* Set system instruction pointer */
2392     BEGIN_BATCH(batch, 2);
2393     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2394     OUT_BATCH(batch, 0);
2395     ADVANCE_BATCH(batch);
2396 }
2397
2398 static void
2399 gen7_emit_state_base_address(VADriverContextP ctx)
2400 {
2401     struct i965_driver_data *i965 = i965_driver_data(ctx);
2402     struct intel_batchbuffer *batch = i965->batch;
2403     struct i965_render_state *render_state = &i965->render_state;
2404
2405     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2406     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2407     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2408     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2409     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2410     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2411     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2412     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2413     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2414     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2415 }
2416
2417 static void
2418 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2419 {
2420     struct i965_driver_data *i965 = i965_driver_data(ctx);
2421     struct intel_batchbuffer *batch = i965->batch;
2422     struct i965_render_state *render_state = &i965->render_state;
2423
2424     BEGIN_BATCH(batch, 2);
2425     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2426     OUT_RELOC(batch,
2427               render_state->cc.viewport,
2428               I915_GEM_DOMAIN_INSTRUCTION, 0,
2429               0);
2430     ADVANCE_BATCH(batch);
2431
2432     BEGIN_BATCH(batch, 2);
2433     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2434     OUT_BATCH(batch, 0);
2435     ADVANCE_BATCH(batch);
2436 }
2437
2438 /*
2439  * URB layout on GEN7 
2440  * ----------------------------------------
2441  * | PS Push Constants (8KB) | VS entries |
2442  * ----------------------------------------
2443  */
2444 static void
2445 gen7_emit_urb(VADriverContextP ctx)
2446 {
2447     struct i965_driver_data *i965 = i965_driver_data(ctx);
2448     struct intel_batchbuffer *batch = i965->batch;
2449
2450     BEGIN_BATCH(batch, 2);
2451     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2452     OUT_BATCH(batch, 8); /* in 1KBs */
2453     ADVANCE_BATCH(batch);
2454
2455     BEGIN_BATCH(batch, 2);
2456     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2457     OUT_BATCH(batch, 
2458               (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
2459               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2460               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2461    ADVANCE_BATCH(batch);
2462
2463    BEGIN_BATCH(batch, 2);
2464    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2465    OUT_BATCH(batch,
2466              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2467              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2468    ADVANCE_BATCH(batch);
2469
2470    BEGIN_BATCH(batch, 2);
2471    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2472    OUT_BATCH(batch,
2473              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2474              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2475    ADVANCE_BATCH(batch);
2476
2477    BEGIN_BATCH(batch, 2);
2478    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2479    OUT_BATCH(batch,
2480              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2481              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2482    ADVANCE_BATCH(batch);
2483 }
2484
2485 static void
2486 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2487 {
2488     struct i965_driver_data *i965 = i965_driver_data(ctx);
2489     struct intel_batchbuffer *batch = i965->batch;
2490     struct i965_render_state *render_state = &i965->render_state;
2491
2492     BEGIN_BATCH(batch, 2);
2493     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2494     OUT_RELOC(batch,
2495               render_state->cc.state,
2496               I915_GEM_DOMAIN_INSTRUCTION, 0,
2497               1);
2498     ADVANCE_BATCH(batch);
2499
2500     BEGIN_BATCH(batch, 2);
2501     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2502     OUT_RELOC(batch,
2503               render_state->cc.blend,
2504               I915_GEM_DOMAIN_INSTRUCTION, 0,
2505               1);
2506     ADVANCE_BATCH(batch);
2507
2508     BEGIN_BATCH(batch, 2);
2509     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2510     OUT_RELOC(batch,
2511               render_state->cc.depth_stencil,
2512               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2513               1);
2514     ADVANCE_BATCH(batch);
2515 }
2516
2517 static void
2518 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2519 {
2520     struct i965_driver_data *i965 = i965_driver_data(ctx);
2521     struct intel_batchbuffer *batch = i965->batch;
2522     struct i965_render_state *render_state = &i965->render_state;
2523
2524     BEGIN_BATCH(batch, 2);
2525     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2526     OUT_RELOC(batch,
2527               render_state->wm.sampler,
2528               I915_GEM_DOMAIN_INSTRUCTION, 0,
2529               0);
2530     ADVANCE_BATCH(batch);
2531 }
2532
2533 static void
2534 gen7_emit_binding_table(VADriverContextP ctx)
2535 {
2536     struct i965_driver_data *i965 = i965_driver_data(ctx);
2537     struct intel_batchbuffer *batch = i965->batch;
2538
2539     BEGIN_BATCH(batch, 2);
2540     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2541     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2542     ADVANCE_BATCH(batch);
2543 }
2544
2545 static void
2546 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2547 {
2548     struct i965_driver_data *i965 = i965_driver_data(ctx);
2549     struct intel_batchbuffer *batch = i965->batch;
2550
2551     BEGIN_BATCH(batch, 7);
2552     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2553     OUT_BATCH(batch,
2554               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2555               (I965_SURFACE_NULL << 29));
2556     OUT_BATCH(batch, 0);
2557     OUT_BATCH(batch, 0);
2558     OUT_BATCH(batch, 0);
2559     OUT_BATCH(batch, 0);
2560     OUT_BATCH(batch, 0);
2561     ADVANCE_BATCH(batch);
2562
2563     BEGIN_BATCH(batch, 3);
2564     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2565     OUT_BATCH(batch, 0);
2566     OUT_BATCH(batch, 0);
2567     ADVANCE_BATCH(batch);
2568 }
2569
2570 static void
2571 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2572 {
2573     i965_render_drawing_rectangle(ctx);
2574 }
2575
2576 static void 
2577 gen7_emit_vs_state(VADriverContextP ctx)
2578 {
2579     struct i965_driver_data *i965 = i965_driver_data(ctx);
2580     struct intel_batchbuffer *batch = i965->batch;
2581
2582     /* disable VS constant buffer */
2583     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2584     OUT_BATCH(batch, 0);
2585     OUT_BATCH(batch, 0);
2586     OUT_BATCH(batch, 0);
2587     OUT_BATCH(batch, 0);
2588     OUT_BATCH(batch, 0);
2589     OUT_BATCH(batch, 0);
2590         
2591     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2592     OUT_BATCH(batch, 0); /* without VS kernel */
2593     OUT_BATCH(batch, 0);
2594     OUT_BATCH(batch, 0);
2595     OUT_BATCH(batch, 0);
2596     OUT_BATCH(batch, 0); /* pass-through */
2597 }
2598
2599 static void 
2600 gen7_emit_bypass_state(VADriverContextP ctx)
2601 {
2602     struct i965_driver_data *i965 = i965_driver_data(ctx);
2603     struct intel_batchbuffer *batch = i965->batch;
2604
2605     /* bypass GS */
2606     BEGIN_BATCH(batch, 7);
2607     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2608     OUT_BATCH(batch, 0);
2609     OUT_BATCH(batch, 0);
2610     OUT_BATCH(batch, 0);
2611     OUT_BATCH(batch, 0);
2612     OUT_BATCH(batch, 0);
2613     OUT_BATCH(batch, 0);
2614     ADVANCE_BATCH(batch);
2615
2616     BEGIN_BATCH(batch, 7);      
2617     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2618     OUT_BATCH(batch, 0); /* without GS kernel */
2619     OUT_BATCH(batch, 0);
2620     OUT_BATCH(batch, 0);
2621     OUT_BATCH(batch, 0);
2622     OUT_BATCH(batch, 0);
2623     OUT_BATCH(batch, 0); /* pass-through */
2624     ADVANCE_BATCH(batch);
2625
2626     BEGIN_BATCH(batch, 2);
2627     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2628     OUT_BATCH(batch, 0);
2629     ADVANCE_BATCH(batch);
2630
2631     /* disable HS */
2632     BEGIN_BATCH(batch, 7);
2633     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2634     OUT_BATCH(batch, 0);
2635     OUT_BATCH(batch, 0);
2636     OUT_BATCH(batch, 0);
2637     OUT_BATCH(batch, 0);
2638     OUT_BATCH(batch, 0);
2639     OUT_BATCH(batch, 0);
2640     ADVANCE_BATCH(batch);
2641
2642     BEGIN_BATCH(batch, 7);
2643     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2644     OUT_BATCH(batch, 0);
2645     OUT_BATCH(batch, 0);
2646     OUT_BATCH(batch, 0);
2647     OUT_BATCH(batch, 0);
2648     OUT_BATCH(batch, 0);
2649     OUT_BATCH(batch, 0);
2650     ADVANCE_BATCH(batch);
2651
2652     BEGIN_BATCH(batch, 2);
2653     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2654     OUT_BATCH(batch, 0);
2655     ADVANCE_BATCH(batch);
2656
2657     /* Disable TE */
2658     BEGIN_BATCH(batch, 4);
2659     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2660     OUT_BATCH(batch, 0);
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     ADVANCE_BATCH(batch);
2664
2665     /* Disable DS */
2666     BEGIN_BATCH(batch, 7);
2667     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, 0);
2671     OUT_BATCH(batch, 0);
2672     OUT_BATCH(batch, 0);
2673     OUT_BATCH(batch, 0);
2674     ADVANCE_BATCH(batch);
2675
2676     BEGIN_BATCH(batch, 6);
2677     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681     OUT_BATCH(batch, 0);
2682     OUT_BATCH(batch, 0);
2683     ADVANCE_BATCH(batch);
2684
2685     BEGIN_BATCH(batch, 2);
2686     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2687     OUT_BATCH(batch, 0);
2688     ADVANCE_BATCH(batch);
2689
2690     /* Disable STREAMOUT */
2691     BEGIN_BATCH(batch, 3);
2692     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2693     OUT_BATCH(batch, 0);
2694     OUT_BATCH(batch, 0);
2695     ADVANCE_BATCH(batch);
2696 }
2697
2698 static void 
2699 gen7_emit_clip_state(VADriverContextP ctx)
2700 {
2701     struct i965_driver_data *i965 = i965_driver_data(ctx);
2702     struct intel_batchbuffer *batch = i965->batch;
2703
2704     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0); /* pass-through */
2707     OUT_BATCH(batch, 0);
2708 }
2709
2710 static void 
2711 gen7_emit_sf_state(VADriverContextP ctx)
2712 {
2713     struct i965_driver_data *i965 = i965_driver_data(ctx);
2714     struct intel_batchbuffer *batch = i965->batch;
2715
2716     BEGIN_BATCH(batch, 14);
2717     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2718     OUT_BATCH(batch,
2719               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2720               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2721               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0); /* DW4 */
2725     OUT_BATCH(batch, 0);
2726     OUT_BATCH(batch, 0);
2727     OUT_BATCH(batch, 0);
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0); /* DW9 */
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0);
2733     OUT_BATCH(batch, 0);
2734     ADVANCE_BATCH(batch);
2735
2736     BEGIN_BATCH(batch, 7);
2737     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2740     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2741     OUT_BATCH(batch, 0);
2742     OUT_BATCH(batch, 0);
2743     OUT_BATCH(batch, 0);
2744     ADVANCE_BATCH(batch);
2745 }
2746
2747 static void 
2748 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2749 {
2750     struct i965_driver_data *i965 = i965_driver_data(ctx);
2751     struct intel_batchbuffer *batch = i965->batch;
2752     struct i965_render_state *render_state = &i965->render_state;
2753
2754     BEGIN_BATCH(batch, 3);
2755     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2756     OUT_BATCH(batch,
2757               GEN7_WM_DISPATCH_ENABLE |
2758               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2759     OUT_BATCH(batch, 0);
2760     ADVANCE_BATCH(batch);
2761
2762     BEGIN_BATCH(batch, 7);
2763     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2764     OUT_BATCH(batch, 1);
2765     OUT_BATCH(batch, 0);
2766     OUT_RELOC(batch, 
2767               render_state->curbe.bo,
2768               I915_GEM_DOMAIN_INSTRUCTION, 0,
2769               0);
2770     OUT_BATCH(batch, 0);
2771     OUT_BATCH(batch, 0);
2772     OUT_BATCH(batch, 0);
2773     ADVANCE_BATCH(batch);
2774
2775     BEGIN_BATCH(batch, 8);
2776     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2777     OUT_RELOC(batch, 
2778               render_state->render_kernels[kernel].bo,
2779               I915_GEM_DOMAIN_INSTRUCTION, 0,
2780               0);
2781     OUT_BATCH(batch, 
2782               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2783               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2784     OUT_BATCH(batch, 0); /* scratch space base offset */
2785     OUT_BATCH(batch, 
2786               ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
2787               GEN7_PS_PUSH_CONSTANT_ENABLE |
2788               GEN7_PS_ATTRIBUTE_ENABLE |
2789               GEN7_PS_16_DISPATCH_ENABLE);
2790     OUT_BATCH(batch, 
2791               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2792     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2793     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2794     ADVANCE_BATCH(batch);
2795 }
2796
2797 static void
2798 gen7_emit_vertex_element_state(VADriverContextP ctx)
2799 {
2800     struct i965_driver_data *i965 = i965_driver_data(ctx);
2801     struct intel_batchbuffer *batch = i965->batch;
2802
2803     /* Set up our vertex elements, sourced from the single vertex buffer. */
2804     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2805     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2806     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2807               GEN6_VE0_VALID |
2808               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2809               (0 << VE0_OFFSET_SHIFT));
2810     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2811               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2812               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2813               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2814     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2815     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2816               GEN6_VE0_VALID |
2817               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2818               (8 << VE0_OFFSET_SHIFT));
2819     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2820               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2821               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2822               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2823 }
2824
2825 static void
2826 gen7_emit_vertices(VADriverContextP ctx)
2827 {
2828     struct i965_driver_data *i965 = i965_driver_data(ctx);
2829     struct intel_batchbuffer *batch = i965->batch;
2830     struct i965_render_state *render_state = &i965->render_state;
2831
2832     BEGIN_BATCH(batch, 5);
2833     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2834     OUT_BATCH(batch, 
2835               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2836               GEN6_VB0_VERTEXDATA |
2837               GEN7_VB0_ADDRESS_MODIFYENABLE |
2838               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2839     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2840     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2841     OUT_BATCH(batch, 0);
2842     ADVANCE_BATCH(batch);
2843
2844     BEGIN_BATCH(batch, 7);
2845     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2846     OUT_BATCH(batch,
2847               _3DPRIM_RECTLIST |
2848               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2849     OUT_BATCH(batch, 3); /* vertex count per instance */
2850     OUT_BATCH(batch, 0); /* start vertex offset */
2851     OUT_BATCH(batch, 1); /* single instance */
2852     OUT_BATCH(batch, 0); /* start instance location */
2853     OUT_BATCH(batch, 0);
2854     ADVANCE_BATCH(batch);
2855 }
2856
2857 static void
2858 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2859 {
2860     struct i965_driver_data *i965 = i965_driver_data(ctx);
2861     struct intel_batchbuffer *batch = i965->batch;
2862
2863     intel_batchbuffer_start_atomic(batch, 0x1000);
2864     intel_batchbuffer_emit_mi_flush(batch);
2865     gen7_emit_invarient_states(ctx);
2866     gen7_emit_state_base_address(ctx);
2867     gen7_emit_viewport_state_pointers(ctx);
2868     gen7_emit_urb(ctx);
2869     gen7_emit_cc_state_pointers(ctx);
2870     gen7_emit_sampler_state_pointers(ctx);
2871     gen7_emit_bypass_state(ctx);
2872     gen7_emit_vs_state(ctx);
2873     gen7_emit_clip_state(ctx);
2874     gen7_emit_sf_state(ctx);
2875     gen7_emit_wm_state(ctx, kernel);
2876     gen7_emit_binding_table(ctx);
2877     gen7_emit_depth_buffer_state(ctx);
2878     gen7_emit_drawing_rectangle(ctx);
2879     gen7_emit_vertex_element_state(ctx);
2880     gen7_emit_vertices(ctx);
2881     intel_batchbuffer_end_atomic(batch);
2882 }
2883
2884 static void
2885 gen7_render_put_surface(VADriverContextP ctx,
2886                         VASurfaceID surface,
2887                         short srcx,
2888                         short srcy,
2889                         unsigned short srcw,
2890                         unsigned short srch,
2891                         short destx,
2892                         short desty,
2893                         unsigned short destw,
2894                         unsigned short desth,
2895                         unsigned int flag)
2896 {
2897     struct i965_driver_data *i965 = i965_driver_data(ctx);
2898     struct intel_batchbuffer *batch = i965->batch;
2899
2900     gen7_render_initialize(ctx);
2901     gen7_render_setup_states(ctx, surface,
2902                              srcx, srcy, srcw, srch,
2903                              destx, desty, destw, desth);
2904     i965_clear_dest_region(ctx);
2905     gen7_render_emit_states(ctx, PS_KERNEL);
2906     intel_batchbuffer_flush(batch);
2907 }
2908
2909 static void
2910 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2911 {
2912     struct i965_driver_data *i965 = i965_driver_data(ctx);
2913     struct i965_render_state *render_state = &i965->render_state;
2914     struct gen6_blend_state *blend_state;
2915
2916     dri_bo_unmap(render_state->cc.state);    
2917     dri_bo_map(render_state->cc.blend, 1);
2918     assert(render_state->cc.blend->virtual);
2919     blend_state = render_state->cc.blend->virtual;
2920     memset(blend_state, 0, sizeof(*blend_state));
2921     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2922     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2923     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2924     blend_state->blend0.blend_enable = 1;
2925     blend_state->blend1.post_blend_clamp_enable = 1;
2926     blend_state->blend1.pre_blend_clamp_enable = 1;
2927     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2928     dri_bo_unmap(render_state->cc.blend);
2929 }
2930
2931 static void
2932 gen7_subpicture_render_setup_states(VADriverContextP ctx,
2933                                     VASurfaceID surface,
2934                                     short srcx,
2935                                     short srcy,
2936                                     unsigned short srcw,
2937                                     unsigned short srch,
2938                                     short destx,
2939                                     short desty,
2940                                     unsigned short destw,
2941                                     unsigned short desth)
2942 {
2943     VARectangle output_rect;
2944
2945     output_rect.x      = destx;
2946     output_rect.y      = desty;
2947     output_rect.width  = destw;
2948     output_rect.height = desth;
2949
2950     i965_render_dest_surface_state(ctx, 0);
2951     i965_subpic_render_src_surfaces_state(ctx, surface);
2952     i965_render_sampler(ctx);
2953     i965_render_cc_viewport(ctx);
2954     gen7_render_color_calc_state(ctx);
2955     gen7_subpicture_render_blend_state(ctx);
2956     gen7_render_depth_stencil_state(ctx);
2957     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
2958 }
2959
2960 static void
2961 gen7_render_put_subpicture(VADriverContextP ctx,
2962                            VASurfaceID surface,
2963                            short srcx,
2964                            short srcy,
2965                            unsigned short srcw,
2966                            unsigned short srch,
2967                            short destx,
2968                            short desty,
2969                            unsigned short destw,
2970                            unsigned short desth)
2971 {
2972     struct i965_driver_data *i965 = i965_driver_data(ctx);
2973     struct intel_batchbuffer *batch = i965->batch;
2974     struct object_surface *obj_surface = SURFACE(surface);
2975     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2976
2977     assert(obj_subpic);
2978     gen7_render_initialize(ctx);
2979     gen7_subpicture_render_setup_states(ctx, surface,
2980                                         srcx, srcy, srcw, srch,
2981                                         destx, desty, destw, desth);
2982     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2983     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2984     intel_batchbuffer_flush(batch);
2985 }
2986
2987
2988 /*
2989  * global functions
2990  */
2991 void
2992 intel_render_put_surface(VADriverContextP ctx,
2993                         VASurfaceID surface,
2994                         short srcx,
2995                         short srcy,
2996                         unsigned short srcw,
2997                         unsigned short srch,
2998                         short destx,
2999                         short desty,
3000                         unsigned short destw,
3001                         unsigned short desth,
3002                         unsigned int flag)
3003 {
3004     struct i965_driver_data *i965 = i965_driver_data(ctx);
3005
3006     i965_post_processing(ctx, surface,
3007                          srcx, srcy, srcw, srch,
3008                          destx, desty, destw, desth,
3009                          flag);
3010
3011     if (IS_GEN7(i965->intel.device_id))
3012         gen7_render_put_surface(ctx, surface,
3013                                 srcx, srcy, srcw, srch,
3014                                 destx, desty, destw, desth,
3015                                 flag);
3016     else if (IS_GEN6(i965->intel.device_id))
3017         gen6_render_put_surface(ctx, surface,
3018                                 srcx, srcy, srcw, srch,
3019                                 destx, desty, destw, desth,
3020                                 flag);
3021     else
3022         i965_render_put_surface(ctx, surface,
3023                                 srcx, srcy, srcw, srch,
3024                                 destx, desty, destw, desth,
3025                                 flag);
3026 }
3027
3028 void
3029 intel_render_put_subpicture(VADriverContextP ctx,
3030                            VASurfaceID surface,
3031                            short srcx,
3032                            short srcy,
3033                            unsigned short srcw,
3034                            unsigned short srch,
3035                            short destx,
3036                            short desty,
3037                            unsigned short destw,
3038                            unsigned short desth)
3039 {
3040     struct i965_driver_data *i965 = i965_driver_data(ctx);
3041
3042     if (IS_GEN7(i965->intel.device_id))
3043         gen7_render_put_subpicture(ctx, surface,
3044                                    srcx, srcy, srcw, srch,
3045                                    destx, desty, destw, desth);
3046     else if (IS_GEN6(i965->intel.device_id))
3047         gen6_render_put_subpicture(ctx, surface,
3048                                    srcx, srcy, srcw, srch,
3049                                    destx, desty, destw, desth);
3050     else
3051         i965_render_put_subpicture(ctx, surface,
3052                                    srcx, srcy, srcw, srch,
3053                                    destx, desty, destw, desth);
3054 }
3055
3056 Bool 
3057 i965_render_init(VADriverContextP ctx)
3058 {
3059     struct i965_driver_data *i965 = i965_driver_data(ctx);
3060     struct i965_render_state *render_state = &i965->render_state;
3061     int i;
3062
3063     /* kernel */
3064     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3065                                  sizeof(render_kernels_gen5[0])));
3066     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3067                                  sizeof(render_kernels_gen6[0])));
3068
3069     if (IS_GEN7(i965->intel.device_id))
3070         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
3071     else if (IS_GEN6(i965->intel.device_id))
3072         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3073     else if (IS_IRONLAKE(i965->intel.device_id))
3074         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3075     else
3076         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3077
3078     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3079         struct i965_kernel *kernel = &render_state->render_kernels[i];
3080
3081         if (!kernel->size)
3082             continue;
3083
3084         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3085                                   kernel->name, 
3086                                   kernel->size, 0x1000);
3087         assert(kernel->bo);
3088         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3089     }
3090
3091     /* constant buffer */
3092     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3093                       "constant buffer",
3094                       4096, 64);
3095     assert(render_state->curbe.bo);
3096     render_state->curbe.upload = 0;
3097
3098     return True;
3099 }
3100
3101 Bool 
3102 i965_render_terminate(VADriverContextP ctx)
3103 {
3104     int i;
3105     struct i965_driver_data *i965 = i965_driver_data(ctx);
3106     struct i965_render_state *render_state = &i965->render_state;
3107
3108     dri_bo_unreference(render_state->curbe.bo);
3109     render_state->curbe.bo = NULL;
3110
3111     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3112         struct i965_kernel *kernel = &render_state->render_kernels[i];
3113         
3114         dri_bo_unreference(kernel->bo);
3115         kernel->bo = NULL;
3116     }
3117
3118     dri_bo_unreference(render_state->vb.vertex_buffer);
3119     render_state->vb.vertex_buffer = NULL;
3120     dri_bo_unreference(render_state->vs.state);
3121     render_state->vs.state = NULL;
3122     dri_bo_unreference(render_state->sf.state);
3123     render_state->sf.state = NULL;
3124     dri_bo_unreference(render_state->wm.sampler);
3125     render_state->wm.sampler = NULL;
3126     dri_bo_unreference(render_state->wm.state);
3127     render_state->wm.state = NULL;
3128     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3129     dri_bo_unreference(render_state->cc.viewport);
3130     render_state->cc.viewport = NULL;
3131     dri_bo_unreference(render_state->cc.state);
3132     render_state->cc.state = NULL;
3133     dri_bo_unreference(render_state->cc.blend);
3134     render_state->cc.blend = NULL;
3135     dri_bo_unreference(render_state->cc.depth_stencil);
3136     render_state->cc.depth_stencil = NULL;
3137
3138     if (render_state->draw_region) {
3139         dri_bo_unreference(render_state->draw_region->bo);
3140         free(render_state->draw_region);
3141         render_state->draw_region = NULL;
3142     }
3143
3144     return True;
3145 }
3146