fix typo in upstream code
[profile/ivi/vaapi-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_drmcommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 /* Programs for Haswell */
137 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
138 #include "shaders/render/exa_wm_src_affine.g7b"
139 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
140 #include "shaders/render/exa_wm_yuv_rgb.g7b"
141 #include "shaders/render/exa_wm_write.g7b"
142 };
143
144 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
145 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
146 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
147 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
148 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
149
150 static uint32_t float_to_uint (float f) 
151 {
152     union {
153         uint32_t i; 
154         float f;
155     } x;
156
157     x.f = f;
158     return x.i;
159 }
160
161 enum 
162 {
163     SF_KERNEL = 0,
164     PS_KERNEL,
165     PS_SUBPIC_KERNEL
166 };
167
168 static struct i965_kernel render_kernels_gen4[] = {
169     {
170         "SF",
171         SF_KERNEL,
172         sf_kernel_static,
173         sizeof(sf_kernel_static),
174         NULL
175     },
176     {
177         "PS",
178         PS_KERNEL,
179         ps_kernel_static,
180         sizeof(ps_kernel_static),
181         NULL
182     },
183
184     {
185         "PS_SUBPIC",
186         PS_SUBPIC_KERNEL,
187         ps_subpic_kernel_static,
188         sizeof(ps_subpic_kernel_static),
189         NULL
190     }
191 };
192
193 static struct i965_kernel render_kernels_gen5[] = {
194     {
195         "SF",
196         SF_KERNEL,
197         sf_kernel_static_gen5,
198         sizeof(sf_kernel_static_gen5),
199         NULL
200     },
201     {
202         "PS",
203         PS_KERNEL,
204         ps_kernel_static_gen5,
205         sizeof(ps_kernel_static_gen5),
206         NULL
207     },
208
209     {
210         "PS_SUBPIC",
211         PS_SUBPIC_KERNEL,
212         ps_subpic_kernel_static_gen5,
213         sizeof(ps_subpic_kernel_static_gen5),
214         NULL
215     }
216 };
217
218 static struct i965_kernel render_kernels_gen6[] = {
219     {
220         "SF",
221         SF_KERNEL,
222         sf_kernel_static_gen6,
223         sizeof(sf_kernel_static_gen6),
224         NULL
225     },
226     {
227         "PS",
228         PS_KERNEL,
229         ps_kernel_static_gen6,
230         sizeof(ps_kernel_static_gen6),
231         NULL
232     },
233
234     {
235         "PS_SUBPIC",
236         PS_SUBPIC_KERNEL,
237         ps_subpic_kernel_static_gen6,
238         sizeof(ps_subpic_kernel_static_gen6),
239         NULL
240     }
241 };
242
243 static struct i965_kernel render_kernels_gen7[] = {
244     {
245         "SF",
246         SF_KERNEL,
247         sf_kernel_static_gen7,
248         sizeof(sf_kernel_static_gen7),
249         NULL
250     },
251     {
252         "PS",
253         PS_KERNEL,
254         ps_kernel_static_gen7,
255         sizeof(ps_kernel_static_gen7),
256         NULL
257     },
258
259     {
260         "PS_SUBPIC",
261         PS_SUBPIC_KERNEL,
262         ps_subpic_kernel_static_gen7,
263         sizeof(ps_subpic_kernel_static_gen7),
264         NULL
265     }
266 };
267
268 static struct i965_kernel render_kernels_gen7_haswell[] = {
269     {
270         "SF",
271         SF_KERNEL,
272         sf_kernel_static_gen7,
273         sizeof(sf_kernel_static_gen7),
274         NULL
275     },
276     {
277         "PS",
278         PS_KERNEL,
279         ps_kernel_static_gen7_haswell,
280         sizeof(ps_kernel_static_gen7_haswell),
281         NULL
282     },
283
284     {
285         "PS_SUBPIC",
286         PS_SUBPIC_KERNEL,
287         ps_subpic_kernel_static_gen7,
288         sizeof(ps_subpic_kernel_static_gen7),
289         NULL
290     }
291 };
292
293 #define URB_VS_ENTRIES        8
294 #define URB_VS_ENTRY_SIZE     1
295
296 #define URB_GS_ENTRIES        0
297 #define URB_GS_ENTRY_SIZE     0
298
299 #define URB_CLIP_ENTRIES      0
300 #define URB_CLIP_ENTRY_SIZE   0
301
302 #define URB_SF_ENTRIES        1
303 #define URB_SF_ENTRY_SIZE     2
304
305 #define URB_CS_ENTRIES        1
306 #define URB_CS_ENTRY_SIZE     1
307
308 static void
309 i965_render_vs_unit(VADriverContextP ctx)
310 {
311     struct i965_driver_data *i965 = i965_driver_data(ctx);
312     struct i965_render_state *render_state = &i965->render_state;
313     struct i965_vs_unit_state *vs_state;
314
315     dri_bo_map(render_state->vs.state, 1);
316     assert(render_state->vs.state->virtual);
317     vs_state = render_state->vs.state->virtual;
318     memset(vs_state, 0, sizeof(*vs_state));
319
320     if (IS_IRONLAKE(i965->intel.device_id))
321         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
322     else
323         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
324
325     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
326     vs_state->vs6.vs_enable = 0;
327     vs_state->vs6.vert_cache_disable = 1;
328     
329     dri_bo_unmap(render_state->vs.state);
330 }
331
332 static void
333 i965_render_sf_unit(VADriverContextP ctx)
334 {
335     struct i965_driver_data *i965 = i965_driver_data(ctx);
336     struct i965_render_state *render_state = &i965->render_state;
337     struct i965_sf_unit_state *sf_state;
338
339     dri_bo_map(render_state->sf.state, 1);
340     assert(render_state->sf.state->virtual);
341     sf_state = render_state->sf.state->virtual;
342     memset(sf_state, 0, sizeof(*sf_state));
343
344     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
345     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
346
347     sf_state->sf1.single_program_flow = 1; /* XXX */
348     sf_state->sf1.binding_table_entry_count = 0;
349     sf_state->sf1.thread_priority = 0;
350     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
351     sf_state->sf1.illegal_op_exception_enable = 1;
352     sf_state->sf1.mask_stack_exception_enable = 1;
353     sf_state->sf1.sw_exception_enable = 1;
354
355     /* scratch space is not used in our kernel */
356     sf_state->thread2.per_thread_scratch_space = 0;
357     sf_state->thread2.scratch_space_base_pointer = 0;
358
359     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
360     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
361     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
362     sf_state->thread3.urb_entry_read_offset = 0;
363     sf_state->thread3.dispatch_grf_start_reg = 3;
364
365     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
366     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
367     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
368     sf_state->thread4.stats_enable = 1;
369
370     sf_state->sf5.viewport_transform = 0; /* skip viewport */
371
372     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
373     sf_state->sf6.scissor = 0;
374
375     sf_state->sf7.trifan_pv = 2;
376
377     sf_state->sf6.dest_org_vbias = 0x8;
378     sf_state->sf6.dest_org_hbias = 0x8;
379
380     dri_bo_emit_reloc(render_state->sf.state,
381                       I915_GEM_DOMAIN_INSTRUCTION, 0,
382                       sf_state->thread0.grf_reg_count << 1,
383                       offsetof(struct i965_sf_unit_state, thread0),
384                       render_state->render_kernels[SF_KERNEL].bo);
385
386     dri_bo_unmap(render_state->sf.state);
387 }
388
389 static void 
390 i965_render_sampler(VADriverContextP ctx)
391 {
392     struct i965_driver_data *i965 = i965_driver_data(ctx);
393     struct i965_render_state *render_state = &i965->render_state;
394     struct i965_sampler_state *sampler_state;
395     int i;
396     
397     assert(render_state->wm.sampler_count > 0);
398     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
399
400     dri_bo_map(render_state->wm.sampler, 1);
401     assert(render_state->wm.sampler->virtual);
402     sampler_state = render_state->wm.sampler->virtual;
403     for (i = 0; i < render_state->wm.sampler_count; i++) {
404         memset(sampler_state, 0, sizeof(*sampler_state));
405         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
406         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
407         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
408         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
409         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
410         sampler_state++;
411     }
412
413     dri_bo_unmap(render_state->wm.sampler);
414 }
415 static void
416 i965_subpic_render_wm_unit(VADriverContextP ctx)
417 {
418     struct i965_driver_data *i965 = i965_driver_data(ctx);
419     struct i965_render_state *render_state = &i965->render_state;
420     struct i965_wm_unit_state *wm_state;
421
422     assert(render_state->wm.sampler);
423
424     dri_bo_map(render_state->wm.state, 1);
425     assert(render_state->wm.state->virtual);
426     wm_state = render_state->wm.state->virtual;
427     memset(wm_state, 0, sizeof(*wm_state));
428
429     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
430     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
431
432     wm_state->thread1.single_program_flow = 1; /* XXX */
433
434     if (IS_IRONLAKE(i965->intel.device_id))
435         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
436     else
437         wm_state->thread1.binding_table_entry_count = 7;
438
439     wm_state->thread2.scratch_space_base_pointer = 0;
440     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
441
442     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
443     wm_state->thread3.const_urb_entry_read_length = 0;
444     wm_state->thread3.const_urb_entry_read_offset = 0;
445     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
446     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
447
448     wm_state->wm4.stats_enable = 0;
449     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
450
451     if (IS_IRONLAKE(i965->intel.device_id)) {
452         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
453     } else {
454         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
455     }
456
457     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
458     wm_state->wm5.thread_dispatch_enable = 1;
459     wm_state->wm5.enable_16_pix = 1;
460     wm_state->wm5.enable_8_pix = 0;
461     wm_state->wm5.early_depth_test = 1;
462
463     dri_bo_emit_reloc(render_state->wm.state,
464                       I915_GEM_DOMAIN_INSTRUCTION, 0,
465                       wm_state->thread0.grf_reg_count << 1,
466                       offsetof(struct i965_wm_unit_state, thread0),
467                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
468
469     dri_bo_emit_reloc(render_state->wm.state,
470                       I915_GEM_DOMAIN_INSTRUCTION, 0,
471                       wm_state->wm4.sampler_count << 2,
472                       offsetof(struct i965_wm_unit_state, wm4),
473                       render_state->wm.sampler);
474
475     dri_bo_unmap(render_state->wm.state);
476 }
477
478
479 static void
480 i965_render_wm_unit(VADriverContextP ctx)
481 {
482     struct i965_driver_data *i965 = i965_driver_data(ctx);
483     struct i965_render_state *render_state = &i965->render_state;
484     struct i965_wm_unit_state *wm_state;
485
486     assert(render_state->wm.sampler);
487
488     dri_bo_map(render_state->wm.state, 1);
489     assert(render_state->wm.state->virtual);
490     wm_state = render_state->wm.state->virtual;
491     memset(wm_state, 0, sizeof(*wm_state));
492
493     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
494     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
495
496     wm_state->thread1.single_program_flow = 1; /* XXX */
497
498     if (IS_IRONLAKE(i965->intel.device_id))
499         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
500     else
501         wm_state->thread1.binding_table_entry_count = 7;
502
503     wm_state->thread2.scratch_space_base_pointer = 0;
504     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
505
506     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
507     wm_state->thread3.const_urb_entry_read_length = 1;
508     wm_state->thread3.const_urb_entry_read_offset = 0;
509     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
510     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
511
512     wm_state->wm4.stats_enable = 0;
513     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
514
515     if (IS_IRONLAKE(i965->intel.device_id)) {
516         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
517     } else {
518         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
519     }
520
521     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
522     wm_state->wm5.thread_dispatch_enable = 1;
523     wm_state->wm5.enable_16_pix = 1;
524     wm_state->wm5.enable_8_pix = 0;
525     wm_state->wm5.early_depth_test = 1;
526
527     dri_bo_emit_reloc(render_state->wm.state,
528                       I915_GEM_DOMAIN_INSTRUCTION, 0,
529                       wm_state->thread0.grf_reg_count << 1,
530                       offsetof(struct i965_wm_unit_state, thread0),
531                       render_state->render_kernels[PS_KERNEL].bo);
532
533     dri_bo_emit_reloc(render_state->wm.state,
534                       I915_GEM_DOMAIN_INSTRUCTION, 0,
535                       wm_state->wm4.sampler_count << 2,
536                       offsetof(struct i965_wm_unit_state, wm4),
537                       render_state->wm.sampler);
538
539     dri_bo_unmap(render_state->wm.state);
540 }
541
542 static void 
543 i965_render_cc_viewport(VADriverContextP ctx)
544 {
545     struct i965_driver_data *i965 = i965_driver_data(ctx);
546     struct i965_render_state *render_state = &i965->render_state;
547     struct i965_cc_viewport *cc_viewport;
548
549     dri_bo_map(render_state->cc.viewport, 1);
550     assert(render_state->cc.viewport->virtual);
551     cc_viewport = render_state->cc.viewport->virtual;
552     memset(cc_viewport, 0, sizeof(*cc_viewport));
553     
554     cc_viewport->min_depth = -1.e35;
555     cc_viewport->max_depth = 1.e35;
556
557     dri_bo_unmap(render_state->cc.viewport);
558 }
559
560 static void 
561 i965_subpic_render_cc_unit(VADriverContextP ctx)
562 {
563     struct i965_driver_data *i965 = i965_driver_data(ctx);
564     struct i965_render_state *render_state = &i965->render_state;
565     struct i965_cc_unit_state *cc_state;
566
567     assert(render_state->cc.viewport);
568
569     dri_bo_map(render_state->cc.state, 1);
570     assert(render_state->cc.state->virtual);
571     cc_state = render_state->cc.state->virtual;
572     memset(cc_state, 0, sizeof(*cc_state));
573
574     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
575     cc_state->cc2.depth_test = 0;       /* disable depth test */
576     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
577     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
578     cc_state->cc3.blend_enable = 1;     /* enable color blend */
579     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
580     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
581     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
582     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
583
584     cc_state->cc5.dither_enable = 0;    /* disable dither */
585     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
586     cc_state->cc5.statistics_enable = 1;
587     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
588     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
589     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
590
591     cc_state->cc6.clamp_post_alpha_blend = 0; 
592     cc_state->cc6.clamp_pre_alpha_blend  =0; 
593     
594     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
595     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
596     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
597     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
598    
599     /*alpha test reference*/
600     cc_state->cc7.alpha_ref.f =0.0 ;
601
602
603     dri_bo_emit_reloc(render_state->cc.state,
604                       I915_GEM_DOMAIN_INSTRUCTION, 0,
605                       0,
606                       offsetof(struct i965_cc_unit_state, cc4),
607                       render_state->cc.viewport);
608
609     dri_bo_unmap(render_state->cc.state);
610 }
611
612
613 static void 
614 i965_render_cc_unit(VADriverContextP ctx)
615 {
616     struct i965_driver_data *i965 = i965_driver_data(ctx);
617     struct i965_render_state *render_state = &i965->render_state;
618     struct i965_cc_unit_state *cc_state;
619
620     assert(render_state->cc.viewport);
621
622     dri_bo_map(render_state->cc.state, 1);
623     assert(render_state->cc.state->virtual);
624     cc_state = render_state->cc.state->virtual;
625     memset(cc_state, 0, sizeof(*cc_state));
626
627     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
628     cc_state->cc2.depth_test = 0;       /* disable depth test */
629     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
630     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
631     cc_state->cc3.blend_enable = 0;     /* disable color blend */
632     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
633     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
634
635     cc_state->cc5.dither_enable = 0;    /* disable dither */
636     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
637     cc_state->cc5.statistics_enable = 1;
638     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
639     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
640     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
641
642     dri_bo_emit_reloc(render_state->cc.state,
643                       I915_GEM_DOMAIN_INSTRUCTION, 0,
644                       0,
645                       offsetof(struct i965_cc_unit_state, cc4),
646                       render_state->cc.viewport);
647
648     dri_bo_unmap(render_state->cc.state);
649 }
650
651 static void
652 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
653 {
654     switch (tiling) {
655     case I915_TILING_NONE:
656         ss->ss3.tiled_surface = 0;
657         ss->ss3.tile_walk = 0;
658         break;
659     case I915_TILING_X:
660         ss->ss3.tiled_surface = 1;
661         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
662         break;
663     case I915_TILING_Y:
664         ss->ss3.tiled_surface = 1;
665         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
666         break;
667     }
668 }
669
670 static void
671 i965_render_set_surface_state(
672     struct i965_surface_state *ss,
673     dri_bo                    *bo,
674     unsigned long              offset,
675     unsigned int               width,
676     unsigned int               height,
677     unsigned int               pitch,
678     unsigned int               format,
679     unsigned int               flags
680 )
681 {
682     unsigned int tiling;
683     unsigned int swizzle;
684
685     memset(ss, 0, sizeof(*ss));
686
687     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
688     case I965_PP_FLAG_BOTTOM_FIELD:
689         ss->ss0.vert_line_stride_ofs = 1;
690         /* fall-through */
691     case I965_PP_FLAG_TOP_FIELD:
692         ss->ss0.vert_line_stride = 1;
693         height /= 2;
694         break;
695     }
696
697     ss->ss0.surface_type = I965_SURFACE_2D;
698     ss->ss0.surface_format = format;
699     ss->ss0.color_blend = 1;
700
701     ss->ss1.base_addr = bo->offset + offset;
702
703     ss->ss2.width = width - 1;
704     ss->ss2.height = height - 1;
705
706     ss->ss3.pitch = pitch - 1;
707
708     dri_bo_get_tiling(bo, &tiling, &swizzle);
709     i965_render_set_surface_tiling(ss, tiling);
710 }
711
712 static void
713 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
714 {
715    switch (tiling) {
716    case I915_TILING_NONE:
717       ss->ss0.tiled_surface = 0;
718       ss->ss0.tile_walk = 0;
719       break;
720    case I915_TILING_X:
721       ss->ss0.tiled_surface = 1;
722       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
723       break;
724    case I915_TILING_Y:
725       ss->ss0.tiled_surface = 1;
726       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
727       break;
728    }
729 }
730
731 /* Set "Shader Channel Select" */
732 void
733 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
734 {
735     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
736     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
737     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
738     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
739 }
740
741 static void
742 gen7_render_set_surface_state(
743     struct gen7_surface_state *ss,
744     dri_bo                    *bo,
745     unsigned long              offset,
746     int                        width,
747     int                        height,
748     int                        pitch,
749     int                        format,
750     unsigned int               flags
751 )
752 {
753     unsigned int tiling;
754     unsigned int swizzle;
755
756     memset(ss, 0, sizeof(*ss));
757
758     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
759     case I965_PP_FLAG_BOTTOM_FIELD:
760         ss->ss0.vert_line_stride_ofs = 1;
761         /* fall-through */
762     case I965_PP_FLAG_TOP_FIELD:
763         ss->ss0.vert_line_stride = 1;
764         height /= 2;
765         break;
766     }
767
768     ss->ss0.surface_type = I965_SURFACE_2D;
769     ss->ss0.surface_format = format;
770
771     ss->ss1.base_addr = bo->offset + offset;
772
773     ss->ss2.width = width - 1;
774     ss->ss2.height = height - 1;
775
776     ss->ss3.pitch = pitch - 1;
777
778     dri_bo_get_tiling(bo, &tiling, &swizzle);
779     gen7_render_set_surface_tiling(ss, tiling);
780 }
781
782 static void
783 i965_render_src_surface_state(
784     VADriverContextP ctx, 
785     int              index,
786     dri_bo          *region,
787     unsigned long    offset,
788     int              w,
789     int              h,
790     int              pitch,
791     int              format,
792     unsigned int     flags
793 )
794 {
795     struct i965_driver_data *i965 = i965_driver_data(ctx);  
796     struct i965_render_state *render_state = &i965->render_state;
797     void *ss;
798     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
799
800     assert(index < MAX_RENDER_SURFACES);
801
802     dri_bo_map(ss_bo, 1);
803     assert(ss_bo->virtual);
804     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
805
806     if (IS_GEN7(i965->intel.device_id)) {
807         gen7_render_set_surface_state(ss,
808                                       region, offset,
809                                       w, h,
810                                       pitch, format, flags);
811         if (IS_HASWELL(i965->intel.device_id))
812             gen7_render_set_surface_scs(ss);
813         dri_bo_emit_reloc(ss_bo,
814                           I915_GEM_DOMAIN_SAMPLER, 0,
815                           offset,
816                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
817                           region);
818     } else {
819         i965_render_set_surface_state(ss,
820                                       region, offset,
821                                       w, h,
822                                       pitch, format, flags);
823         dri_bo_emit_reloc(ss_bo,
824                           I915_GEM_DOMAIN_SAMPLER, 0,
825                           offset,
826                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
827                           region);
828     }
829
830     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
831     dri_bo_unmap(ss_bo);
832     render_state->wm.sampler_count++;
833 }
834
835 static void
836 i965_render_src_surfaces_state(
837     VADriverContextP ctx,
838     struct object_surface *obj_surface,
839     unsigned int     flags
840 )
841 {
842     int region_pitch;
843     int rw, rh;
844     dri_bo *region;
845
846     region_pitch = obj_surface->width;
847     rw = obj_surface->orig_width;
848     rh = obj_surface->orig_height;
849     region = obj_surface->bo;
850
851     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
852     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
853
854     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
855         i965_render_src_surface_state(ctx, 3, region,
856                                       region_pitch * obj_surface->y_cb_offset,
857                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
858                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
859         i965_render_src_surface_state(ctx, 4, region,
860                                       region_pitch * obj_surface->y_cb_offset,
861                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
862                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
863     } else {
864         i965_render_src_surface_state(ctx, 3, region,
865                                       region_pitch * obj_surface->y_cb_offset,
866                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
867                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
868         i965_render_src_surface_state(ctx, 4, region,
869                                       region_pitch * obj_surface->y_cb_offset,
870                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
871                                       I965_SURFACEFORMAT_R8_UNORM, flags);
872         i965_render_src_surface_state(ctx, 5, region,
873                                       region_pitch * obj_surface->y_cr_offset,
874                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
875                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
876         i965_render_src_surface_state(ctx, 6, region,
877                                       region_pitch * obj_surface->y_cr_offset,
878                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
879                                       I965_SURFACEFORMAT_R8_UNORM, flags);
880     }
881 }
882
883 static void
884 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
885                                       struct object_surface *obj_surface)
886 {
887     dri_bo *subpic_region;
888     unsigned int index = obj_surface->subpic_render_idx;
889     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
890     struct object_image *obj_image = obj_subpic->obj_image;
891     assert(obj_surface);
892     assert(obj_surface->bo);
893     subpic_region = obj_image->bo;
894     /*subpicture surface*/
895     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
896     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
897 }
898
899 static void
900 i965_render_dest_surface_state(VADriverContextP ctx, int index)
901 {
902     struct i965_driver_data *i965 = i965_driver_data(ctx);  
903     struct i965_render_state *render_state = &i965->render_state;
904     struct intel_region *dest_region = render_state->draw_region;
905     void *ss;
906     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
907     int format;
908     assert(index < MAX_RENDER_SURFACES);
909
910     if (dest_region->cpp == 2) {
911         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
912     } else {
913         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
914     }
915
916     dri_bo_map(ss_bo, 1);
917     assert(ss_bo->virtual);
918     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
919
920     if (IS_GEN7(i965->intel.device_id)) {
921         gen7_render_set_surface_state(ss,
922                                       dest_region->bo, 0,
923                                       dest_region->width, dest_region->height,
924                                       dest_region->pitch, format, 0);
925         if (IS_HASWELL(i965->intel.device_id))
926             gen7_render_set_surface_scs(ss);
927         dri_bo_emit_reloc(ss_bo,
928                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
929                           0,
930                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
931                           dest_region->bo);
932     } else {
933         i965_render_set_surface_state(ss,
934                                       dest_region->bo, 0,
935                                       dest_region->width, dest_region->height,
936                                       dest_region->pitch, format, 0);
937         dri_bo_emit_reloc(ss_bo,
938                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
939                           0,
940                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
941                           dest_region->bo);
942     }
943
944     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
945     dri_bo_unmap(ss_bo);
946 }
947
948 static void
949 i965_fill_vertex_buffer(
950     VADriverContextP ctx,
951     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
952     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
953 )
954 {
955     struct i965_driver_data * const i965 = i965_driver_data(ctx);
956     float vb[12];
957
958     enum { X1, Y1, X2, Y2 };
959
960     static const unsigned int g_rotation_indices[][6] = {
961         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
962         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
963         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
964         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
965     };
966
967     const unsigned int * const rotation_indices =
968         g_rotation_indices[i965->rotation_attrib->value];
969
970     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
971     vb[1]  = tex_coords[rotation_indices[1]];
972     vb[2]  = vid_coords[X2];
973     vb[3]  = vid_coords[Y2];
974
975     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
976     vb[5]  = tex_coords[rotation_indices[3]];
977     vb[6]  = vid_coords[X1];
978     vb[7]  = vid_coords[Y2];
979
980     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
981     vb[9]  = tex_coords[rotation_indices[5]];
982     vb[10] = vid_coords[X1];
983     vb[11] = vid_coords[Y1];
984
985     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
986 }
987
988 static void 
989 i965_subpic_render_upload_vertex(VADriverContextP ctx,
990                                  struct object_surface *obj_surface,
991                                  const VARectangle *output_rect)
992 {    
993     unsigned int index = obj_surface->subpic_render_idx;
994     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
995     float tex_coords[4], vid_coords[4];
996     VARectangle dst_rect;
997
998     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
999         dst_rect = obj_subpic->dst_rect;
1000     else {
1001         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1002         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1003         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1004         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1005         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1006         dst_rect.height = sy * obj_subpic->dst_rect.height;
1007     }
1008
1009     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1010     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1011     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1012     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1013
1014     vid_coords[0] = dst_rect.x;
1015     vid_coords[1] = dst_rect.y;
1016     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1017     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1018
1019     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1020 }
1021
1022 static void 
1023 i965_render_upload_vertex(
1024     VADriverContextP   ctx,
1025     struct object_surface *obj_surface,
1026     const VARectangle *src_rect,
1027     const VARectangle *dst_rect
1028 )
1029 {
1030     struct i965_driver_data *i965 = i965_driver_data(ctx);
1031     struct i965_render_state *render_state = &i965->render_state;
1032     struct intel_region *dest_region = render_state->draw_region;
1033     float tex_coords[4], vid_coords[4];
1034     int width, height;
1035
1036     width  = obj_surface->orig_width;
1037     height = obj_surface->orig_height;
1038
1039     tex_coords[0] = (float)src_rect->x / width;
1040     tex_coords[1] = (float)src_rect->y / height;
1041     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1042     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1043
1044     vid_coords[0] = dest_region->x + dst_rect->x;
1045     vid_coords[1] = dest_region->y + dst_rect->y;
1046     vid_coords[2] = vid_coords[0] + dst_rect->width;
1047     vid_coords[3] = vid_coords[1] + dst_rect->height;
1048
1049     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1050 }
1051
1052 static void
1053 i965_render_upload_constants(VADriverContextP ctx,
1054                              struct object_surface *obj_surface)
1055 {
1056     struct i965_driver_data *i965 = i965_driver_data(ctx);
1057     struct i965_render_state *render_state = &i965->render_state;
1058     unsigned short *constant_buffer;
1059
1060     dri_bo_map(render_state->curbe.bo, 1);
1061     assert(render_state->curbe.bo->virtual);
1062     constant_buffer = render_state->curbe.bo->virtual;
1063
1064     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1065         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
1066                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
1067         *constant_buffer = 2;
1068     } else {
1069         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1070             *constant_buffer = 1;
1071         else
1072             *constant_buffer = 0;
1073     }
1074
1075     dri_bo_unmap(render_state->curbe.bo);
1076 }
1077
1078 static void
1079 i965_subpic_render_upload_constants(VADriverContextP ctx,
1080                                     struct object_surface *obj_surface)
1081 {
1082     struct i965_driver_data *i965 = i965_driver_data(ctx);
1083     struct i965_render_state *render_state = &i965->render_state;
1084     float *constant_buffer;
1085     float global_alpha = 1.0;
1086     unsigned int index = obj_surface->subpic_render_idx;
1087     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1088     
1089     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1090         global_alpha = obj_subpic->global_alpha;
1091     }
1092
1093     dri_bo_map(render_state->curbe.bo, 1);
1094
1095     assert(render_state->curbe.bo->virtual);
1096     constant_buffer = render_state->curbe.bo->virtual;
1097     *constant_buffer = global_alpha;
1098
1099     dri_bo_unmap(render_state->curbe.bo);
1100 }
1101  
1102 static void
1103 i965_surface_render_state_setup(
1104     VADriverContextP   ctx,
1105     struct object_surface *obj_surface,
1106     const VARectangle *src_rect,
1107     const VARectangle *dst_rect,
1108     unsigned int       flags
1109 )
1110 {
1111     i965_render_vs_unit(ctx);
1112     i965_render_sf_unit(ctx);
1113     i965_render_dest_surface_state(ctx, 0);
1114     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1115     i965_render_sampler(ctx);
1116     i965_render_wm_unit(ctx);
1117     i965_render_cc_viewport(ctx);
1118     i965_render_cc_unit(ctx);
1119     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1120     i965_render_upload_constants(ctx, obj_surface);
1121 }
1122
1123 static void
1124 i965_subpic_render_state_setup(
1125     VADriverContextP   ctx,
1126     struct object_surface *obj_surface,
1127     const VARectangle *src_rect,
1128     const VARectangle *dst_rect
1129 )
1130 {
1131     i965_render_vs_unit(ctx);
1132     i965_render_sf_unit(ctx);
1133     i965_render_dest_surface_state(ctx, 0);
1134     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1135     i965_render_sampler(ctx);
1136     i965_subpic_render_wm_unit(ctx);
1137     i965_render_cc_viewport(ctx);
1138     i965_subpic_render_cc_unit(ctx);
1139     i965_subpic_render_upload_constants(ctx, obj_surface);
1140     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1141 }
1142
1143
1144 static void
1145 i965_render_pipeline_select(VADriverContextP ctx)
1146 {
1147     struct i965_driver_data *i965 = i965_driver_data(ctx);
1148     struct intel_batchbuffer *batch = i965->batch;
1149  
1150     BEGIN_BATCH(batch, 1);
1151     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1152     ADVANCE_BATCH(batch);
1153 }
1154
1155 static void
1156 i965_render_state_sip(VADriverContextP ctx)
1157 {
1158     struct i965_driver_data *i965 = i965_driver_data(ctx);
1159     struct intel_batchbuffer *batch = i965->batch;
1160
1161     BEGIN_BATCH(batch, 2);
1162     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1163     OUT_BATCH(batch, 0);
1164     ADVANCE_BATCH(batch);
1165 }
1166
1167 static void
1168 i965_render_state_base_address(VADriverContextP ctx)
1169 {
1170     struct i965_driver_data *i965 = i965_driver_data(ctx);
1171     struct intel_batchbuffer *batch = i965->batch;
1172     struct i965_render_state *render_state = &i965->render_state;
1173
1174     if (IS_IRONLAKE(i965->intel.device_id)) {
1175         BEGIN_BATCH(batch, 8);
1176         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1177         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1178         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1179         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1180         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1181         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1182         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1183         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1184         ADVANCE_BATCH(batch);
1185     } else {
1186         BEGIN_BATCH(batch, 6);
1187         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1188         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1189         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1190         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1191         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1192         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1193         ADVANCE_BATCH(batch);
1194     }
1195 }
1196
1197 static void
1198 i965_render_binding_table_pointers(VADriverContextP ctx)
1199 {
1200     struct i965_driver_data *i965 = i965_driver_data(ctx);
1201     struct intel_batchbuffer *batch = i965->batch;
1202
1203     BEGIN_BATCH(batch, 6);
1204     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1205     OUT_BATCH(batch, 0); /* vs */
1206     OUT_BATCH(batch, 0); /* gs */
1207     OUT_BATCH(batch, 0); /* clip */
1208     OUT_BATCH(batch, 0); /* sf */
1209     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1210     ADVANCE_BATCH(batch);
1211 }
1212
1213 static void 
1214 i965_render_constant_color(VADriverContextP ctx)
1215 {
1216     struct i965_driver_data *i965 = i965_driver_data(ctx);
1217     struct intel_batchbuffer *batch = i965->batch;
1218
1219     BEGIN_BATCH(batch, 5);
1220     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1221     OUT_BATCH(batch, float_to_uint(1.0));
1222     OUT_BATCH(batch, float_to_uint(0.0));
1223     OUT_BATCH(batch, float_to_uint(1.0));
1224     OUT_BATCH(batch, float_to_uint(1.0));
1225     ADVANCE_BATCH(batch);
1226 }
1227
1228 static void
1229 i965_render_pipelined_pointers(VADriverContextP ctx)
1230 {
1231     struct i965_driver_data *i965 = i965_driver_data(ctx);
1232     struct intel_batchbuffer *batch = i965->batch;
1233     struct i965_render_state *render_state = &i965->render_state;
1234
1235     BEGIN_BATCH(batch, 7);
1236     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1237     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1238     OUT_BATCH(batch, 0);  /* disable GS */
1239     OUT_BATCH(batch, 0);  /* disable CLIP */
1240     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1241     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1242     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1243     ADVANCE_BATCH(batch);
1244 }
1245
1246 static void
1247 i965_render_urb_layout(VADriverContextP ctx)
1248 {
1249     struct i965_driver_data *i965 = i965_driver_data(ctx);
1250     struct intel_batchbuffer *batch = i965->batch;
1251     int urb_vs_start, urb_vs_size;
1252     int urb_gs_start, urb_gs_size;
1253     int urb_clip_start, urb_clip_size;
1254     int urb_sf_start, urb_sf_size;
1255     int urb_cs_start, urb_cs_size;
1256
1257     urb_vs_start = 0;
1258     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1259     urb_gs_start = urb_vs_start + urb_vs_size;
1260     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1261     urb_clip_start = urb_gs_start + urb_gs_size;
1262     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1263     urb_sf_start = urb_clip_start + urb_clip_size;
1264     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1265     urb_cs_start = urb_sf_start + urb_sf_size;
1266     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1267
1268     BEGIN_BATCH(batch, 3);
1269     OUT_BATCH(batch, 
1270               CMD_URB_FENCE |
1271               UF0_CS_REALLOC |
1272               UF0_SF_REALLOC |
1273               UF0_CLIP_REALLOC |
1274               UF0_GS_REALLOC |
1275               UF0_VS_REALLOC |
1276               1);
1277     OUT_BATCH(batch, 
1278               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1279               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1280               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1281     OUT_BATCH(batch,
1282               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1283               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1284     ADVANCE_BATCH(batch);
1285 }
1286
1287 static void 
1288 i965_render_cs_urb_layout(VADriverContextP ctx)
1289 {
1290     struct i965_driver_data *i965 = i965_driver_data(ctx);
1291     struct intel_batchbuffer *batch = i965->batch;
1292
1293     BEGIN_BATCH(batch, 2);
1294     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1295     OUT_BATCH(batch,
1296               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1297               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1298     ADVANCE_BATCH(batch);
1299 }
1300
1301 static void
1302 i965_render_constant_buffer(VADriverContextP ctx)
1303 {
1304     struct i965_driver_data *i965 = i965_driver_data(ctx);
1305     struct intel_batchbuffer *batch = i965->batch;
1306     struct i965_render_state *render_state = &i965->render_state;
1307
1308     BEGIN_BATCH(batch, 2);
1309     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1310     OUT_RELOC(batch, render_state->curbe.bo,
1311               I915_GEM_DOMAIN_INSTRUCTION, 0,
1312               URB_CS_ENTRY_SIZE - 1);
1313     ADVANCE_BATCH(batch);    
1314 }
1315
1316 static void
1317 i965_render_drawing_rectangle(VADriverContextP ctx)
1318 {
1319     struct i965_driver_data *i965 = i965_driver_data(ctx);
1320     struct intel_batchbuffer *batch = i965->batch;
1321     struct i965_render_state *render_state = &i965->render_state;
1322     struct intel_region *dest_region = render_state->draw_region;
1323
1324     BEGIN_BATCH(batch, 4);
1325     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1326     OUT_BATCH(batch, 0x00000000);
1327     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1328     OUT_BATCH(batch, 0x00000000);         
1329     ADVANCE_BATCH(batch);
1330 }
1331
1332 static void
1333 i965_render_vertex_elements(VADriverContextP ctx)
1334 {
1335     struct i965_driver_data *i965 = i965_driver_data(ctx);
1336     struct intel_batchbuffer *batch = i965->batch;
1337
1338     if (IS_IRONLAKE(i965->intel.device_id)) {
1339         BEGIN_BATCH(batch, 5);
1340         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1341         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1342         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1343                   VE0_VALID |
1344                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1345                   (0 << VE0_OFFSET_SHIFT));
1346         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1347                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1348                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1349                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1350         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1351         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1352                   VE0_VALID |
1353                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1354                   (8 << VE0_OFFSET_SHIFT));
1355         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1356                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1357                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1358                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1359         ADVANCE_BATCH(batch);
1360     } else {
1361         BEGIN_BATCH(batch, 5);
1362         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1363         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1364         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1365                   VE0_VALID |
1366                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1367                   (0 << VE0_OFFSET_SHIFT));
1368         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1369                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1370                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1371                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1372                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1373         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1374         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1375                   VE0_VALID |
1376                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1377                   (8 << VE0_OFFSET_SHIFT));
1378         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1379                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1380                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1381                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1382                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1383         ADVANCE_BATCH(batch);
1384     }
1385 }
1386
1387 static void
1388 i965_render_upload_image_palette(
1389     VADriverContextP ctx,
1390     struct object_image *obj_image,
1391     unsigned int     alpha
1392 )
1393 {
1394     struct i965_driver_data *i965 = i965_driver_data(ctx);
1395     struct intel_batchbuffer *batch = i965->batch;
1396     unsigned int i;
1397
1398     assert(obj_image);
1399
1400     if (!obj_image)
1401         return;
1402
1403     if (obj_image->image.num_palette_entries == 0)
1404         return;
1405
1406     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1407     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1408     /*fill palette*/
1409     //int32_t out[16]; //0-23:color 23-31:alpha
1410     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1411         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1412     ADVANCE_BATCH(batch);
1413 }
1414
1415 static void
1416 i965_render_startup(VADriverContextP ctx)
1417 {
1418     struct i965_driver_data *i965 = i965_driver_data(ctx);
1419     struct intel_batchbuffer *batch = i965->batch;
1420     struct i965_render_state *render_state = &i965->render_state;
1421
1422     BEGIN_BATCH(batch, 11);
1423     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1424     OUT_BATCH(batch, 
1425               (0 << VB0_BUFFER_INDEX_SHIFT) |
1426               VB0_VERTEXDATA |
1427               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1428     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1429
1430     if (IS_IRONLAKE(i965->intel.device_id))
1431         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1432     else
1433         OUT_BATCH(batch, 3);
1434
1435     OUT_BATCH(batch, 0);
1436
1437     OUT_BATCH(batch, 
1438               CMD_3DPRIMITIVE |
1439               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1440               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1441               (0 << 9) |
1442               4);
1443     OUT_BATCH(batch, 3); /* vertex count per instance */
1444     OUT_BATCH(batch, 0); /* start vertex offset */
1445     OUT_BATCH(batch, 1); /* single instance */
1446     OUT_BATCH(batch, 0); /* start instance location */
1447     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1448     ADVANCE_BATCH(batch);
1449 }
1450
1451 static void 
1452 i965_clear_dest_region(VADriverContextP ctx)
1453 {
1454     struct i965_driver_data *i965 = i965_driver_data(ctx);
1455     struct intel_batchbuffer *batch = i965->batch;
1456     struct i965_render_state *render_state = &i965->render_state;
1457     struct intel_region *dest_region = render_state->draw_region;
1458     unsigned int blt_cmd, br13;
1459     int pitch;
1460
1461     blt_cmd = XY_COLOR_BLT_CMD;
1462     br13 = 0xf0 << 16;
1463     pitch = dest_region->pitch;
1464
1465     if (dest_region->cpp == 4) {
1466         br13 |= BR13_8888;
1467         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1468     } else {
1469         assert(dest_region->cpp == 2);
1470         br13 |= BR13_565;
1471     }
1472
1473     if (dest_region->tiling != I915_TILING_NONE) {
1474         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1475         pitch /= 4;
1476     }
1477
1478     br13 |= pitch;
1479
1480     if (IS_GEN6(i965->intel.device_id) ||
1481         IS_GEN7(i965->intel.device_id)) {
1482         intel_batchbuffer_start_atomic_blt(batch, 24);
1483         BEGIN_BLT_BATCH(batch, 6);
1484     } else {
1485         intel_batchbuffer_start_atomic(batch, 24);
1486         BEGIN_BATCH(batch, 6);
1487     }
1488
1489     OUT_BATCH(batch, blt_cmd);
1490     OUT_BATCH(batch, br13);
1491     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1492     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1493               (dest_region->x + dest_region->width));
1494     OUT_RELOC(batch, dest_region->bo, 
1495               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1496               0);
1497     OUT_BATCH(batch, 0x0);
1498     ADVANCE_BATCH(batch);
1499     intel_batchbuffer_end_atomic(batch);
1500 }
1501
1502 static void
1503 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1504 {
1505     struct i965_driver_data *i965 = i965_driver_data(ctx);
1506     struct intel_batchbuffer *batch = i965->batch;
1507
1508     i965_clear_dest_region(ctx);
1509     intel_batchbuffer_start_atomic(batch, 0x1000);
1510     intel_batchbuffer_emit_mi_flush(batch);
1511     i965_render_pipeline_select(ctx);
1512     i965_render_state_sip(ctx);
1513     i965_render_state_base_address(ctx);
1514     i965_render_binding_table_pointers(ctx);
1515     i965_render_constant_color(ctx);
1516     i965_render_pipelined_pointers(ctx);
1517     i965_render_urb_layout(ctx);
1518     i965_render_cs_urb_layout(ctx);
1519     i965_render_constant_buffer(ctx);
1520     i965_render_drawing_rectangle(ctx);
1521     i965_render_vertex_elements(ctx);
1522     i965_render_startup(ctx);
1523     intel_batchbuffer_end_atomic(batch);
1524 }
1525
1526 static void
1527 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1528 {
1529     struct i965_driver_data *i965 = i965_driver_data(ctx);
1530     struct intel_batchbuffer *batch = i965->batch;
1531
1532     intel_batchbuffer_start_atomic(batch, 0x1000);
1533     intel_batchbuffer_emit_mi_flush(batch);
1534     i965_render_pipeline_select(ctx);
1535     i965_render_state_sip(ctx);
1536     i965_render_state_base_address(ctx);
1537     i965_render_binding_table_pointers(ctx);
1538     i965_render_constant_color(ctx);
1539     i965_render_pipelined_pointers(ctx);
1540     i965_render_urb_layout(ctx);
1541     i965_render_cs_urb_layout(ctx);
1542     i965_render_drawing_rectangle(ctx);
1543     i965_render_vertex_elements(ctx);
1544     i965_render_startup(ctx);
1545     intel_batchbuffer_end_atomic(batch);
1546 }
1547
1548
1549 static void 
1550 i965_render_initialize(VADriverContextP ctx)
1551 {
1552     struct i965_driver_data *i965 = i965_driver_data(ctx);
1553     struct i965_render_state *render_state = &i965->render_state;
1554     dri_bo *bo;
1555
1556     /* VERTEX BUFFER */
1557     dri_bo_unreference(render_state->vb.vertex_buffer);
1558     bo = dri_bo_alloc(i965->intel.bufmgr,
1559                       "vertex buffer",
1560                       4096,
1561                       4096);
1562     assert(bo);
1563     render_state->vb.vertex_buffer = bo;
1564
1565     /* VS */
1566     dri_bo_unreference(render_state->vs.state);
1567     bo = dri_bo_alloc(i965->intel.bufmgr,
1568                       "vs state",
1569                       sizeof(struct i965_vs_unit_state),
1570                       64);
1571     assert(bo);
1572     render_state->vs.state = bo;
1573
1574     /* GS */
1575     /* CLIP */
1576     /* SF */
1577     dri_bo_unreference(render_state->sf.state);
1578     bo = dri_bo_alloc(i965->intel.bufmgr,
1579                       "sf state",
1580                       sizeof(struct i965_sf_unit_state),
1581                       64);
1582     assert(bo);
1583     render_state->sf.state = bo;
1584
1585     /* WM */
1586     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1587     bo = dri_bo_alloc(i965->intel.bufmgr,
1588                       "surface state & binding table",
1589                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1590                       4096);
1591     assert(bo);
1592     render_state->wm.surface_state_binding_table_bo = bo;
1593
1594     dri_bo_unreference(render_state->wm.sampler);
1595     bo = dri_bo_alloc(i965->intel.bufmgr,
1596                       "sampler state",
1597                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1598                       64);
1599     assert(bo);
1600     render_state->wm.sampler = bo;
1601     render_state->wm.sampler_count = 0;
1602
1603     dri_bo_unreference(render_state->wm.state);
1604     bo = dri_bo_alloc(i965->intel.bufmgr,
1605                       "wm state",
1606                       sizeof(struct i965_wm_unit_state),
1607                       64);
1608     assert(bo);
1609     render_state->wm.state = bo;
1610
1611     /* COLOR CALCULATOR */
1612     dri_bo_unreference(render_state->cc.state);
1613     bo = dri_bo_alloc(i965->intel.bufmgr,
1614                       "color calc state",
1615                       sizeof(struct i965_cc_unit_state),
1616                       64);
1617     assert(bo);
1618     render_state->cc.state = bo;
1619
1620     dri_bo_unreference(render_state->cc.viewport);
1621     bo = dri_bo_alloc(i965->intel.bufmgr,
1622                       "cc viewport",
1623                       sizeof(struct i965_cc_viewport),
1624                       64);
1625     assert(bo);
1626     render_state->cc.viewport = bo;
1627 }
1628
1629 static void
1630 i965_render_put_surface(
1631     VADriverContextP   ctx,
1632     struct object_surface *obj_surface,
1633     const VARectangle *src_rect,
1634     const VARectangle *dst_rect,
1635     unsigned int       flags
1636 )
1637 {
1638     struct i965_driver_data *i965 = i965_driver_data(ctx);
1639     struct intel_batchbuffer *batch = i965->batch;
1640
1641     i965_render_initialize(ctx);
1642     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1643     i965_surface_render_pipeline_setup(ctx);
1644     intel_batchbuffer_flush(batch);
1645 }
1646
1647 static void
1648 i965_render_put_subpicture(
1649     VADriverContextP   ctx,
1650     struct object_surface *obj_surface,
1651     const VARectangle *src_rect,
1652     const VARectangle *dst_rect
1653 )
1654 {
1655     struct i965_driver_data *i965 = i965_driver_data(ctx);
1656     struct intel_batchbuffer *batch = i965->batch;
1657     unsigned int index = obj_surface->subpic_render_idx;
1658     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1659
1660     assert(obj_subpic);
1661
1662     i965_render_initialize(ctx);
1663     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1664     i965_subpic_render_pipeline_setup(ctx);
1665     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1666     intel_batchbuffer_flush(batch);
1667 }
1668
1669 /*
1670  * for GEN6+
1671  */
1672 static void 
1673 gen6_render_initialize(VADriverContextP ctx)
1674 {
1675     struct i965_driver_data *i965 = i965_driver_data(ctx);
1676     struct i965_render_state *render_state = &i965->render_state;
1677     dri_bo *bo;
1678
1679     /* VERTEX BUFFER */
1680     dri_bo_unreference(render_state->vb.vertex_buffer);
1681     bo = dri_bo_alloc(i965->intel.bufmgr,
1682                       "vertex buffer",
1683                       4096,
1684                       4096);
1685     assert(bo);
1686     render_state->vb.vertex_buffer = bo;
1687
1688     /* WM */
1689     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1690     bo = dri_bo_alloc(i965->intel.bufmgr,
1691                       "surface state & binding table",
1692                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1693                       4096);
1694     assert(bo);
1695     render_state->wm.surface_state_binding_table_bo = bo;
1696
1697     dri_bo_unreference(render_state->wm.sampler);
1698     bo = dri_bo_alloc(i965->intel.bufmgr,
1699                       "sampler state",
1700                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1701                       4096);
1702     assert(bo);
1703     render_state->wm.sampler = bo;
1704     render_state->wm.sampler_count = 0;
1705
1706     /* COLOR CALCULATOR */
1707     dri_bo_unreference(render_state->cc.state);
1708     bo = dri_bo_alloc(i965->intel.bufmgr,
1709                       "color calc state",
1710                       sizeof(struct gen6_color_calc_state),
1711                       4096);
1712     assert(bo);
1713     render_state->cc.state = bo;
1714
1715     /* CC VIEWPORT */
1716     dri_bo_unreference(render_state->cc.viewport);
1717     bo = dri_bo_alloc(i965->intel.bufmgr,
1718                       "cc viewport",
1719                       sizeof(struct i965_cc_viewport),
1720                       4096);
1721     assert(bo);
1722     render_state->cc.viewport = bo;
1723
1724     /* BLEND STATE */
1725     dri_bo_unreference(render_state->cc.blend);
1726     bo = dri_bo_alloc(i965->intel.bufmgr,
1727                       "blend state",
1728                       sizeof(struct gen6_blend_state),
1729                       4096);
1730     assert(bo);
1731     render_state->cc.blend = bo;
1732
1733     /* DEPTH & STENCIL STATE */
1734     dri_bo_unreference(render_state->cc.depth_stencil);
1735     bo = dri_bo_alloc(i965->intel.bufmgr,
1736                       "depth & stencil state",
1737                       sizeof(struct gen6_depth_stencil_state),
1738                       4096);
1739     assert(bo);
1740     render_state->cc.depth_stencil = bo;
1741 }
1742
1743 static void
1744 gen6_render_color_calc_state(VADriverContextP ctx)
1745 {
1746     struct i965_driver_data *i965 = i965_driver_data(ctx);
1747     struct i965_render_state *render_state = &i965->render_state;
1748     struct gen6_color_calc_state *color_calc_state;
1749     
1750     dri_bo_map(render_state->cc.state, 1);
1751     assert(render_state->cc.state->virtual);
1752     color_calc_state = render_state->cc.state->virtual;
1753     memset(color_calc_state, 0, sizeof(*color_calc_state));
1754     color_calc_state->constant_r = 1.0;
1755     color_calc_state->constant_g = 0.0;
1756     color_calc_state->constant_b = 1.0;
1757     color_calc_state->constant_a = 1.0;
1758     dri_bo_unmap(render_state->cc.state);
1759 }
1760
1761 static void
1762 gen6_render_blend_state(VADriverContextP ctx)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct i965_render_state *render_state = &i965->render_state;
1766     struct gen6_blend_state *blend_state;
1767     
1768     dri_bo_map(render_state->cc.blend, 1);
1769     assert(render_state->cc.blend->virtual);
1770     blend_state = render_state->cc.blend->virtual;
1771     memset(blend_state, 0, sizeof(*blend_state));
1772     blend_state->blend1.logic_op_enable = 1;
1773     blend_state->blend1.logic_op_func = 0xc;
1774     dri_bo_unmap(render_state->cc.blend);
1775 }
1776
1777 static void
1778 gen6_render_depth_stencil_state(VADriverContextP ctx)
1779 {
1780     struct i965_driver_data *i965 = i965_driver_data(ctx);
1781     struct i965_render_state *render_state = &i965->render_state;
1782     struct gen6_depth_stencil_state *depth_stencil_state;
1783     
1784     dri_bo_map(render_state->cc.depth_stencil, 1);
1785     assert(render_state->cc.depth_stencil->virtual);
1786     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1787     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1788     dri_bo_unmap(render_state->cc.depth_stencil);
1789 }
1790
1791 static void
1792 gen6_render_setup_states(
1793     VADriverContextP   ctx,
1794     struct object_surface *obj_surface,
1795     const VARectangle *src_rect,
1796     const VARectangle *dst_rect,
1797     unsigned int       flags
1798 )
1799 {
1800     i965_render_dest_surface_state(ctx, 0);
1801     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1802     i965_render_sampler(ctx);
1803     i965_render_cc_viewport(ctx);
1804     gen6_render_color_calc_state(ctx);
1805     gen6_render_blend_state(ctx);
1806     gen6_render_depth_stencil_state(ctx);
1807     i965_render_upload_constants(ctx, obj_surface);
1808     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1809 }
1810
1811 static void
1812 gen6_emit_invarient_states(VADriverContextP ctx)
1813 {
1814     struct i965_driver_data *i965 = i965_driver_data(ctx);
1815     struct intel_batchbuffer *batch = i965->batch;
1816
1817     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1818
1819     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1820     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1821               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1822     OUT_BATCH(batch, 0);
1823
1824     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1825     OUT_BATCH(batch, 1);
1826
1827     /* Set system instruction pointer */
1828     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1829     OUT_BATCH(batch, 0);
1830 }
1831
1832 static void
1833 gen6_emit_state_base_address(VADriverContextP ctx)
1834 {
1835     struct i965_driver_data *i965 = i965_driver_data(ctx);
1836     struct intel_batchbuffer *batch = i965->batch;
1837     struct i965_render_state *render_state = &i965->render_state;
1838
1839     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1840     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1841     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1842     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1843     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1844     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1845     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1846     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1847     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1848     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1849 }
1850
1851 static void
1852 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1853 {
1854     struct i965_driver_data *i965 = i965_driver_data(ctx);
1855     struct intel_batchbuffer *batch = i965->batch;
1856     struct i965_render_state *render_state = &i965->render_state;
1857
1858     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1859               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1860               (4 - 2));
1861     OUT_BATCH(batch, 0);
1862     OUT_BATCH(batch, 0);
1863     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1864 }
1865
1866 static void
1867 gen6_emit_urb(VADriverContextP ctx)
1868 {
1869     struct i965_driver_data *i965 = i965_driver_data(ctx);
1870     struct intel_batchbuffer *batch = i965->batch;
1871
1872     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1873     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1874               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1875     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1876               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1877 }
1878
1879 static void
1880 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1881 {
1882     struct i965_driver_data *i965 = i965_driver_data(ctx);
1883     struct intel_batchbuffer *batch = i965->batch;
1884     struct i965_render_state *render_state = &i965->render_state;
1885
1886     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1887     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1888     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1889     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1890 }
1891
1892 static void
1893 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1894 {
1895     struct i965_driver_data *i965 = i965_driver_data(ctx);
1896     struct intel_batchbuffer *batch = i965->batch;
1897     struct i965_render_state *render_state = &i965->render_state;
1898
1899     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1900               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1901               (4 - 2));
1902     OUT_BATCH(batch, 0); /* VS */
1903     OUT_BATCH(batch, 0); /* GS */
1904     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1905 }
1906
1907 static void
1908 gen6_emit_binding_table(VADriverContextP ctx)
1909 {
1910     struct i965_driver_data *i965 = i965_driver_data(ctx);
1911     struct intel_batchbuffer *batch = i965->batch;
1912
1913     /* Binding table pointers */
1914     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1915               GEN6_BINDING_TABLE_MODIFY_PS |
1916               (4 - 2));
1917     OUT_BATCH(batch, 0);                /* vs */
1918     OUT_BATCH(batch, 0);                /* gs */
1919     /* Only the PS uses the binding table */
1920     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1921 }
1922
1923 static void
1924 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1925 {
1926     struct i965_driver_data *i965 = i965_driver_data(ctx);
1927     struct intel_batchbuffer *batch = i965->batch;
1928
1929     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1930     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1931               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1932     OUT_BATCH(batch, 0);
1933     OUT_BATCH(batch, 0);
1934     OUT_BATCH(batch, 0);
1935     OUT_BATCH(batch, 0);
1936     OUT_BATCH(batch, 0);
1937
1938     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1939     OUT_BATCH(batch, 0);
1940 }
1941
1942 static void
1943 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1944 {
1945     i965_render_drawing_rectangle(ctx);
1946 }
1947
1948 static void 
1949 gen6_emit_vs_state(VADriverContextP ctx)
1950 {
1951     struct i965_driver_data *i965 = i965_driver_data(ctx);
1952     struct intel_batchbuffer *batch = i965->batch;
1953
1954     /* disable VS constant buffer */
1955     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1956     OUT_BATCH(batch, 0);
1957     OUT_BATCH(batch, 0);
1958     OUT_BATCH(batch, 0);
1959     OUT_BATCH(batch, 0);
1960         
1961     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1962     OUT_BATCH(batch, 0); /* without VS kernel */
1963     OUT_BATCH(batch, 0);
1964     OUT_BATCH(batch, 0);
1965     OUT_BATCH(batch, 0);
1966     OUT_BATCH(batch, 0); /* pass-through */
1967 }
1968
1969 static void 
1970 gen6_emit_gs_state(VADriverContextP ctx)
1971 {
1972     struct i965_driver_data *i965 = i965_driver_data(ctx);
1973     struct intel_batchbuffer *batch = i965->batch;
1974
1975     /* disable GS constant buffer */
1976     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1977     OUT_BATCH(batch, 0);
1978     OUT_BATCH(batch, 0);
1979     OUT_BATCH(batch, 0);
1980     OUT_BATCH(batch, 0);
1981         
1982     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1983     OUT_BATCH(batch, 0); /* without GS kernel */
1984     OUT_BATCH(batch, 0);
1985     OUT_BATCH(batch, 0);
1986     OUT_BATCH(batch, 0);
1987     OUT_BATCH(batch, 0);
1988     OUT_BATCH(batch, 0); /* pass-through */
1989 }
1990
1991 static void 
1992 gen6_emit_clip_state(VADriverContextP ctx)
1993 {
1994     struct i965_driver_data *i965 = i965_driver_data(ctx);
1995     struct intel_batchbuffer *batch = i965->batch;
1996
1997     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1998     OUT_BATCH(batch, 0);
1999     OUT_BATCH(batch, 0); /* pass-through */
2000     OUT_BATCH(batch, 0);
2001 }
2002
2003 static void 
2004 gen6_emit_sf_state(VADriverContextP ctx)
2005 {
2006     struct i965_driver_data *i965 = i965_driver_data(ctx);
2007     struct intel_batchbuffer *batch = i965->batch;
2008
2009     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2010     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2011               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2012               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2013     OUT_BATCH(batch, 0);
2014     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2015     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2016     OUT_BATCH(batch, 0);
2017     OUT_BATCH(batch, 0);
2018     OUT_BATCH(batch, 0);
2019     OUT_BATCH(batch, 0);
2020     OUT_BATCH(batch, 0); /* DW9 */
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0);
2024     OUT_BATCH(batch, 0);
2025     OUT_BATCH(batch, 0); /* DW14 */
2026     OUT_BATCH(batch, 0);
2027     OUT_BATCH(batch, 0);
2028     OUT_BATCH(batch, 0);
2029     OUT_BATCH(batch, 0);
2030     OUT_BATCH(batch, 0); /* DW19 */
2031 }
2032
2033 static void 
2034 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2035 {
2036     struct i965_driver_data *i965 = i965_driver_data(ctx);
2037     struct intel_batchbuffer *batch = i965->batch;
2038     struct i965_render_state *render_state = &i965->render_state;
2039
2040     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2041               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2042               (5 - 2));
2043     OUT_RELOC(batch, 
2044               render_state->curbe.bo,
2045               I915_GEM_DOMAIN_INSTRUCTION, 0,
2046               0);
2047     OUT_BATCH(batch, 0);
2048     OUT_BATCH(batch, 0);
2049     OUT_BATCH(batch, 0);
2050
2051     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2052     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2053               I915_GEM_DOMAIN_INSTRUCTION, 0,
2054               0);
2055     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2056               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2057     OUT_BATCH(batch, 0);
2058     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2059     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2060               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2061               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2062     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2063               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2064     OUT_BATCH(batch, 0);
2065     OUT_BATCH(batch, 0);
2066 }
2067
2068 static void
2069 gen6_emit_vertex_element_state(VADriverContextP ctx)
2070 {
2071     struct i965_driver_data *i965 = i965_driver_data(ctx);
2072     struct intel_batchbuffer *batch = i965->batch;
2073
2074     /* Set up our vertex elements, sourced from the single vertex buffer. */
2075     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2076     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2077     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2078               GEN6_VE0_VALID |
2079               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2080               (0 << VE0_OFFSET_SHIFT));
2081     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2082               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2083               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2084               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2085     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2086     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2087               GEN6_VE0_VALID |
2088               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2089               (8 << VE0_OFFSET_SHIFT));
2090     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2091               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2092               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2093               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2094 }
2095
2096 static void
2097 gen6_emit_vertices(VADriverContextP ctx)
2098 {
2099     struct i965_driver_data *i965 = i965_driver_data(ctx);
2100     struct intel_batchbuffer *batch = i965->batch;
2101     struct i965_render_state *render_state = &i965->render_state;
2102
2103     BEGIN_BATCH(batch, 11);
2104     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2105     OUT_BATCH(batch, 
2106               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2107               GEN6_VB0_VERTEXDATA |
2108               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2109     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2110     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2111     OUT_BATCH(batch, 0);
2112
2113     OUT_BATCH(batch, 
2114               CMD_3DPRIMITIVE |
2115               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2116               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2117               (0 << 9) |
2118               4);
2119     OUT_BATCH(batch, 3); /* vertex count per instance */
2120     OUT_BATCH(batch, 0); /* start vertex offset */
2121     OUT_BATCH(batch, 1); /* single instance */
2122     OUT_BATCH(batch, 0); /* start instance location */
2123     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2124     ADVANCE_BATCH(batch);
2125 }
2126
2127 static void
2128 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2129 {
2130     struct i965_driver_data *i965 = i965_driver_data(ctx);
2131     struct intel_batchbuffer *batch = i965->batch;
2132
2133     intel_batchbuffer_start_atomic(batch, 0x1000);
2134     intel_batchbuffer_emit_mi_flush(batch);
2135     gen6_emit_invarient_states(ctx);
2136     gen6_emit_state_base_address(ctx);
2137     gen6_emit_viewport_state_pointers(ctx);
2138     gen6_emit_urb(ctx);
2139     gen6_emit_cc_state_pointers(ctx);
2140     gen6_emit_sampler_state_pointers(ctx);
2141     gen6_emit_vs_state(ctx);
2142     gen6_emit_gs_state(ctx);
2143     gen6_emit_clip_state(ctx);
2144     gen6_emit_sf_state(ctx);
2145     gen6_emit_wm_state(ctx, kernel);
2146     gen6_emit_binding_table(ctx);
2147     gen6_emit_depth_buffer_state(ctx);
2148     gen6_emit_drawing_rectangle(ctx);
2149     gen6_emit_vertex_element_state(ctx);
2150     gen6_emit_vertices(ctx);
2151     intel_batchbuffer_end_atomic(batch);
2152 }
2153
2154 static void
2155 gen6_render_put_surface(
2156     VADriverContextP   ctx,
2157     struct object_surface *obj_surface,
2158     const VARectangle *src_rect,
2159     const VARectangle *dst_rect,
2160     unsigned int       flags
2161 )
2162 {
2163     struct i965_driver_data *i965 = i965_driver_data(ctx);
2164     struct intel_batchbuffer *batch = i965->batch;
2165
2166     gen6_render_initialize(ctx);
2167     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2168     i965_clear_dest_region(ctx);
2169     gen6_render_emit_states(ctx, PS_KERNEL);
2170     intel_batchbuffer_flush(batch);
2171 }
2172
2173 static void
2174 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2175 {
2176     struct i965_driver_data *i965 = i965_driver_data(ctx);
2177     struct i965_render_state *render_state = &i965->render_state;
2178     struct gen6_blend_state *blend_state;
2179
2180     dri_bo_unmap(render_state->cc.state);    
2181     dri_bo_map(render_state->cc.blend, 1);
2182     assert(render_state->cc.blend->virtual);
2183     blend_state = render_state->cc.blend->virtual;
2184     memset(blend_state, 0, sizeof(*blend_state));
2185     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2186     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2187     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2188     blend_state->blend0.blend_enable = 1;
2189     blend_state->blend1.post_blend_clamp_enable = 1;
2190     blend_state->blend1.pre_blend_clamp_enable = 1;
2191     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2192     dri_bo_unmap(render_state->cc.blend);
2193 }
2194
2195 static void
2196 gen6_subpicture_render_setup_states(
2197     VADriverContextP   ctx,
2198     struct object_surface *obj_surface,
2199     const VARectangle *src_rect,
2200     const VARectangle *dst_rect
2201 )
2202 {
2203     i965_render_dest_surface_state(ctx, 0);
2204     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2205     i965_render_sampler(ctx);
2206     i965_render_cc_viewport(ctx);
2207     gen6_render_color_calc_state(ctx);
2208     gen6_subpicture_render_blend_state(ctx);
2209     gen6_render_depth_stencil_state(ctx);
2210     i965_subpic_render_upload_constants(ctx, obj_surface);
2211     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2212 }
2213
2214 static void
2215 gen6_render_put_subpicture(
2216     VADriverContextP   ctx,
2217     struct object_surface *obj_surface,
2218     const VARectangle *src_rect,
2219     const VARectangle *dst_rect
2220 )
2221 {
2222     struct i965_driver_data *i965 = i965_driver_data(ctx);
2223     struct intel_batchbuffer *batch = i965->batch;
2224     unsigned int index = obj_surface->subpic_render_idx;
2225     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2226
2227     assert(obj_subpic);
2228     gen6_render_initialize(ctx);
2229     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2230     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2231     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2232     intel_batchbuffer_flush(batch);
2233 }
2234
2235 /*
2236  * for GEN7
2237  */
2238 static void 
2239 gen7_render_initialize(VADriverContextP ctx)
2240 {
2241     struct i965_driver_data *i965 = i965_driver_data(ctx);
2242     struct i965_render_state *render_state = &i965->render_state;
2243     dri_bo *bo;
2244
2245     /* VERTEX BUFFER */
2246     dri_bo_unreference(render_state->vb.vertex_buffer);
2247     bo = dri_bo_alloc(i965->intel.bufmgr,
2248                       "vertex buffer",
2249                       4096,
2250                       4096);
2251     assert(bo);
2252     render_state->vb.vertex_buffer = bo;
2253
2254     /* WM */
2255     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2256     bo = dri_bo_alloc(i965->intel.bufmgr,
2257                       "surface state & binding table",
2258                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2259                       4096);
2260     assert(bo);
2261     render_state->wm.surface_state_binding_table_bo = bo;
2262
2263     dri_bo_unreference(render_state->wm.sampler);
2264     bo = dri_bo_alloc(i965->intel.bufmgr,
2265                       "sampler state",
2266                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2267                       4096);
2268     assert(bo);
2269     render_state->wm.sampler = bo;
2270     render_state->wm.sampler_count = 0;
2271
2272     /* COLOR CALCULATOR */
2273     dri_bo_unreference(render_state->cc.state);
2274     bo = dri_bo_alloc(i965->intel.bufmgr,
2275                       "color calc state",
2276                       sizeof(struct gen6_color_calc_state),
2277                       4096);
2278     assert(bo);
2279     render_state->cc.state = bo;
2280
2281     /* CC VIEWPORT */
2282     dri_bo_unreference(render_state->cc.viewport);
2283     bo = dri_bo_alloc(i965->intel.bufmgr,
2284                       "cc viewport",
2285                       sizeof(struct i965_cc_viewport),
2286                       4096);
2287     assert(bo);
2288     render_state->cc.viewport = bo;
2289
2290     /* BLEND STATE */
2291     dri_bo_unreference(render_state->cc.blend);
2292     bo = dri_bo_alloc(i965->intel.bufmgr,
2293                       "blend state",
2294                       sizeof(struct gen6_blend_state),
2295                       4096);
2296     assert(bo);
2297     render_state->cc.blend = bo;
2298
2299     /* DEPTH & STENCIL STATE */
2300     dri_bo_unreference(render_state->cc.depth_stencil);
2301     bo = dri_bo_alloc(i965->intel.bufmgr,
2302                       "depth & stencil state",
2303                       sizeof(struct gen6_depth_stencil_state),
2304                       4096);
2305     assert(bo);
2306     render_state->cc.depth_stencil = bo;
2307 }
2308
2309 static void
2310 gen7_render_color_calc_state(VADriverContextP ctx)
2311 {
2312     struct i965_driver_data *i965 = i965_driver_data(ctx);
2313     struct i965_render_state *render_state = &i965->render_state;
2314     struct gen6_color_calc_state *color_calc_state;
2315     
2316     dri_bo_map(render_state->cc.state, 1);
2317     assert(render_state->cc.state->virtual);
2318     color_calc_state = render_state->cc.state->virtual;
2319     memset(color_calc_state, 0, sizeof(*color_calc_state));
2320     color_calc_state->constant_r = 1.0;
2321     color_calc_state->constant_g = 0.0;
2322     color_calc_state->constant_b = 1.0;
2323     color_calc_state->constant_a = 1.0;
2324     dri_bo_unmap(render_state->cc.state);
2325 }
2326
2327 static void
2328 gen7_render_blend_state(VADriverContextP ctx)
2329 {
2330     struct i965_driver_data *i965 = i965_driver_data(ctx);
2331     struct i965_render_state *render_state = &i965->render_state;
2332     struct gen6_blend_state *blend_state;
2333     
2334     dri_bo_map(render_state->cc.blend, 1);
2335     assert(render_state->cc.blend->virtual);
2336     blend_state = render_state->cc.blend->virtual;
2337     memset(blend_state, 0, sizeof(*blend_state));
2338     blend_state->blend1.logic_op_enable = 1;
2339     blend_state->blend1.logic_op_func = 0xc;
2340     blend_state->blend1.pre_blend_clamp_enable = 1;
2341     dri_bo_unmap(render_state->cc.blend);
2342 }
2343
2344 static void
2345 gen7_render_depth_stencil_state(VADriverContextP ctx)
2346 {
2347     struct i965_driver_data *i965 = i965_driver_data(ctx);
2348     struct i965_render_state *render_state = &i965->render_state;
2349     struct gen6_depth_stencil_state *depth_stencil_state;
2350     
2351     dri_bo_map(render_state->cc.depth_stencil, 1);
2352     assert(render_state->cc.depth_stencil->virtual);
2353     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2354     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2355     dri_bo_unmap(render_state->cc.depth_stencil);
2356 }
2357
2358 static void 
2359 gen7_render_sampler(VADriverContextP ctx)
2360 {
2361     struct i965_driver_data *i965 = i965_driver_data(ctx);
2362     struct i965_render_state *render_state = &i965->render_state;
2363     struct gen7_sampler_state *sampler_state;
2364     int i;
2365     
2366     assert(render_state->wm.sampler_count > 0);
2367     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2368
2369     dri_bo_map(render_state->wm.sampler, 1);
2370     assert(render_state->wm.sampler->virtual);
2371     sampler_state = render_state->wm.sampler->virtual;
2372     for (i = 0; i < render_state->wm.sampler_count; i++) {
2373         memset(sampler_state, 0, sizeof(*sampler_state));
2374         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2375         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2376         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2377         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2378         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2379         sampler_state++;
2380     }
2381
2382     dri_bo_unmap(render_state->wm.sampler);
2383 }
2384
2385 static void
2386 gen7_render_setup_states(
2387     VADriverContextP   ctx,
2388     struct object_surface *obj_surface,
2389     const VARectangle *src_rect,
2390     const VARectangle *dst_rect,
2391     unsigned int       flags
2392 )
2393 {
2394     i965_render_dest_surface_state(ctx, 0);
2395     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2396     gen7_render_sampler(ctx);
2397     i965_render_cc_viewport(ctx);
2398     gen7_render_color_calc_state(ctx);
2399     gen7_render_blend_state(ctx);
2400     gen7_render_depth_stencil_state(ctx);
2401     i965_render_upload_constants(ctx, obj_surface);
2402     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2403 }
2404
2405 static void
2406 gen7_emit_invarient_states(VADriverContextP ctx)
2407 {
2408     struct i965_driver_data *i965 = i965_driver_data(ctx);
2409     struct intel_batchbuffer *batch = i965->batch;
2410
2411     BEGIN_BATCH(batch, 1);
2412     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2413     ADVANCE_BATCH(batch);
2414
2415     BEGIN_BATCH(batch, 4);
2416     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2417     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2418               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2419     OUT_BATCH(batch, 0);
2420     OUT_BATCH(batch, 0);
2421     ADVANCE_BATCH(batch);
2422
2423     BEGIN_BATCH(batch, 2);
2424     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2425     OUT_BATCH(batch, 1);
2426     ADVANCE_BATCH(batch);
2427
2428     /* Set system instruction pointer */
2429     BEGIN_BATCH(batch, 2);
2430     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2431     OUT_BATCH(batch, 0);
2432     ADVANCE_BATCH(batch);
2433 }
2434
2435 static void
2436 gen7_emit_state_base_address(VADriverContextP ctx)
2437 {
2438     struct i965_driver_data *i965 = i965_driver_data(ctx);
2439     struct intel_batchbuffer *batch = i965->batch;
2440     struct i965_render_state *render_state = &i965->render_state;
2441
2442     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2443     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2444     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2445     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2446     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2447     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2448     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2449     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2450     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2451     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2452 }
2453
2454 static void
2455 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2456 {
2457     struct i965_driver_data *i965 = i965_driver_data(ctx);
2458     struct intel_batchbuffer *batch = i965->batch;
2459     struct i965_render_state *render_state = &i965->render_state;
2460
2461     BEGIN_BATCH(batch, 2);
2462     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2463     OUT_RELOC(batch,
2464               render_state->cc.viewport,
2465               I915_GEM_DOMAIN_INSTRUCTION, 0,
2466               0);
2467     ADVANCE_BATCH(batch);
2468
2469     BEGIN_BATCH(batch, 2);
2470     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2471     OUT_BATCH(batch, 0);
2472     ADVANCE_BATCH(batch);
2473 }
2474
2475 /*
2476  * URB layout on GEN7 
2477  * ----------------------------------------
2478  * | PS Push Constants (8KB) | VS entries |
2479  * ----------------------------------------
2480  */
2481 static void
2482 gen7_emit_urb(VADriverContextP ctx)
2483 {
2484     struct i965_driver_data *i965 = i965_driver_data(ctx);
2485     struct intel_batchbuffer *batch = i965->batch;
2486     unsigned int num_urb_entries = 32;
2487
2488     if (IS_HASWELL(i965->intel.device_id))
2489         num_urb_entries = 64;
2490
2491     BEGIN_BATCH(batch, 2);
2492     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2493     OUT_BATCH(batch, 8); /* in 1KBs */
2494     ADVANCE_BATCH(batch);
2495
2496     BEGIN_BATCH(batch, 2);
2497     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2498     OUT_BATCH(batch, 
2499               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2500               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2501               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2502    ADVANCE_BATCH(batch);
2503
2504    BEGIN_BATCH(batch, 2);
2505    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2506    OUT_BATCH(batch,
2507              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2508              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2509    ADVANCE_BATCH(batch);
2510
2511    BEGIN_BATCH(batch, 2);
2512    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2513    OUT_BATCH(batch,
2514              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2515              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2516    ADVANCE_BATCH(batch);
2517
2518    BEGIN_BATCH(batch, 2);
2519    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2520    OUT_BATCH(batch,
2521              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2522              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2523    ADVANCE_BATCH(batch);
2524 }
2525
2526 static void
2527 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2528 {
2529     struct i965_driver_data *i965 = i965_driver_data(ctx);
2530     struct intel_batchbuffer *batch = i965->batch;
2531     struct i965_render_state *render_state = &i965->render_state;
2532
2533     BEGIN_BATCH(batch, 2);
2534     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2535     OUT_RELOC(batch,
2536               render_state->cc.state,
2537               I915_GEM_DOMAIN_INSTRUCTION, 0,
2538               1);
2539     ADVANCE_BATCH(batch);
2540
2541     BEGIN_BATCH(batch, 2);
2542     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2543     OUT_RELOC(batch,
2544               render_state->cc.blend,
2545               I915_GEM_DOMAIN_INSTRUCTION, 0,
2546               1);
2547     ADVANCE_BATCH(batch);
2548
2549     BEGIN_BATCH(batch, 2);
2550     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2551     OUT_RELOC(batch,
2552               render_state->cc.depth_stencil,
2553               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2554               1);
2555     ADVANCE_BATCH(batch);
2556 }
2557
2558 static void
2559 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2560 {
2561     struct i965_driver_data *i965 = i965_driver_data(ctx);
2562     struct intel_batchbuffer *batch = i965->batch;
2563     struct i965_render_state *render_state = &i965->render_state;
2564
2565     BEGIN_BATCH(batch, 2);
2566     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2567     OUT_RELOC(batch,
2568               render_state->wm.sampler,
2569               I915_GEM_DOMAIN_INSTRUCTION, 0,
2570               0);
2571     ADVANCE_BATCH(batch);
2572 }
2573
2574 static void
2575 gen7_emit_binding_table(VADriverContextP ctx)
2576 {
2577     struct i965_driver_data *i965 = i965_driver_data(ctx);
2578     struct intel_batchbuffer *batch = i965->batch;
2579
2580     BEGIN_BATCH(batch, 2);
2581     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2582     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2583     ADVANCE_BATCH(batch);
2584 }
2585
2586 static void
2587 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2588 {
2589     struct i965_driver_data *i965 = i965_driver_data(ctx);
2590     struct intel_batchbuffer *batch = i965->batch;
2591
2592     BEGIN_BATCH(batch, 7);
2593     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2594     OUT_BATCH(batch,
2595               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2596               (I965_SURFACE_NULL << 29));
2597     OUT_BATCH(batch, 0);
2598     OUT_BATCH(batch, 0);
2599     OUT_BATCH(batch, 0);
2600     OUT_BATCH(batch, 0);
2601     OUT_BATCH(batch, 0);
2602     ADVANCE_BATCH(batch);
2603
2604     BEGIN_BATCH(batch, 3);
2605     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2606     OUT_BATCH(batch, 0);
2607     OUT_BATCH(batch, 0);
2608     ADVANCE_BATCH(batch);
2609 }
2610
2611 static void
2612 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2613 {
2614     i965_render_drawing_rectangle(ctx);
2615 }
2616
2617 static void 
2618 gen7_emit_vs_state(VADriverContextP ctx)
2619 {
2620     struct i965_driver_data *i965 = i965_driver_data(ctx);
2621     struct intel_batchbuffer *batch = i965->batch;
2622
2623     /* disable VS constant buffer */
2624     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2625     OUT_BATCH(batch, 0);
2626     OUT_BATCH(batch, 0);
2627     OUT_BATCH(batch, 0);
2628     OUT_BATCH(batch, 0);
2629     OUT_BATCH(batch, 0);
2630     OUT_BATCH(batch, 0);
2631         
2632     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2633     OUT_BATCH(batch, 0); /* without VS kernel */
2634     OUT_BATCH(batch, 0);
2635     OUT_BATCH(batch, 0);
2636     OUT_BATCH(batch, 0);
2637     OUT_BATCH(batch, 0); /* pass-through */
2638 }
2639
2640 static void 
2641 gen7_emit_bypass_state(VADriverContextP ctx)
2642 {
2643     struct i965_driver_data *i965 = i965_driver_data(ctx);
2644     struct intel_batchbuffer *batch = i965->batch;
2645
2646     /* bypass GS */
2647     BEGIN_BATCH(batch, 7);
2648     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2649     OUT_BATCH(batch, 0);
2650     OUT_BATCH(batch, 0);
2651     OUT_BATCH(batch, 0);
2652     OUT_BATCH(batch, 0);
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     ADVANCE_BATCH(batch);
2656
2657     BEGIN_BATCH(batch, 7);      
2658     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2659     OUT_BATCH(batch, 0); /* without GS kernel */
2660     OUT_BATCH(batch, 0);
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0); /* pass-through */
2665     ADVANCE_BATCH(batch);
2666
2667     BEGIN_BATCH(batch, 2);
2668     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2669     OUT_BATCH(batch, 0);
2670     ADVANCE_BATCH(batch);
2671
2672     /* disable HS */
2673     BEGIN_BATCH(batch, 7);
2674     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2675     OUT_BATCH(batch, 0);
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681     ADVANCE_BATCH(batch);
2682
2683     BEGIN_BATCH(batch, 7);
2684     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2685     OUT_BATCH(batch, 0);
2686     OUT_BATCH(batch, 0);
2687     OUT_BATCH(batch, 0);
2688     OUT_BATCH(batch, 0);
2689     OUT_BATCH(batch, 0);
2690     OUT_BATCH(batch, 0);
2691     ADVANCE_BATCH(batch);
2692
2693     BEGIN_BATCH(batch, 2);
2694     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2695     OUT_BATCH(batch, 0);
2696     ADVANCE_BATCH(batch);
2697
2698     /* Disable TE */
2699     BEGIN_BATCH(batch, 4);
2700     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2701     OUT_BATCH(batch, 0);
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     ADVANCE_BATCH(batch);
2705
2706     /* Disable DS */
2707     BEGIN_BATCH(batch, 7);
2708     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2709     OUT_BATCH(batch, 0);
2710     OUT_BATCH(batch, 0);
2711     OUT_BATCH(batch, 0);
2712     OUT_BATCH(batch, 0);
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     ADVANCE_BATCH(batch);
2716
2717     BEGIN_BATCH(batch, 6);
2718     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2719     OUT_BATCH(batch, 0);
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     ADVANCE_BATCH(batch);
2725
2726     BEGIN_BATCH(batch, 2);
2727     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2728     OUT_BATCH(batch, 0);
2729     ADVANCE_BATCH(batch);
2730
2731     /* Disable STREAMOUT */
2732     BEGIN_BATCH(batch, 3);
2733     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2734     OUT_BATCH(batch, 0);
2735     OUT_BATCH(batch, 0);
2736     ADVANCE_BATCH(batch);
2737 }
2738
2739 static void 
2740 gen7_emit_clip_state(VADriverContextP ctx)
2741 {
2742     struct i965_driver_data *i965 = i965_driver_data(ctx);
2743     struct intel_batchbuffer *batch = i965->batch;
2744
2745     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2746     OUT_BATCH(batch, 0);
2747     OUT_BATCH(batch, 0); /* pass-through */
2748     OUT_BATCH(batch, 0);
2749 }
2750
2751 static void 
2752 gen7_emit_sf_state(VADriverContextP ctx)
2753 {
2754     struct i965_driver_data *i965 = i965_driver_data(ctx);
2755     struct intel_batchbuffer *batch = i965->batch;
2756
2757     BEGIN_BATCH(batch, 14);
2758     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2759     OUT_BATCH(batch,
2760               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2761               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2762               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2763     OUT_BATCH(batch, 0);
2764     OUT_BATCH(batch, 0);
2765     OUT_BATCH(batch, 0); /* DW4 */
2766     OUT_BATCH(batch, 0);
2767     OUT_BATCH(batch, 0);
2768     OUT_BATCH(batch, 0);
2769     OUT_BATCH(batch, 0);
2770     OUT_BATCH(batch, 0); /* DW9 */
2771     OUT_BATCH(batch, 0);
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     OUT_BATCH(batch, 0);
2775     ADVANCE_BATCH(batch);
2776
2777     BEGIN_BATCH(batch, 7);
2778     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2779     OUT_BATCH(batch, 0);
2780     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2781     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2782     OUT_BATCH(batch, 0);
2783     OUT_BATCH(batch, 0);
2784     OUT_BATCH(batch, 0);
2785     ADVANCE_BATCH(batch);
2786 }
2787
2788 static void 
2789 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2790 {
2791     struct i965_driver_data *i965 = i965_driver_data(ctx);
2792     struct intel_batchbuffer *batch = i965->batch;
2793     struct i965_render_state *render_state = &i965->render_state;
2794     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2795     unsigned int num_samples = 0;
2796
2797     if (IS_HASWELL(i965->intel.device_id)) {
2798         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2799         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2800     }
2801
2802     BEGIN_BATCH(batch, 3);
2803     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2804     OUT_BATCH(batch,
2805               GEN7_WM_DISPATCH_ENABLE |
2806               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2807     OUT_BATCH(batch, 0);
2808     ADVANCE_BATCH(batch);
2809
2810     BEGIN_BATCH(batch, 7);
2811     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2812     OUT_BATCH(batch, 1);
2813     OUT_BATCH(batch, 0);
2814     OUT_RELOC(batch, 
2815               render_state->curbe.bo,
2816               I915_GEM_DOMAIN_INSTRUCTION, 0,
2817               0);
2818     OUT_BATCH(batch, 0);
2819     OUT_BATCH(batch, 0);
2820     OUT_BATCH(batch, 0);
2821     ADVANCE_BATCH(batch);
2822
2823     BEGIN_BATCH(batch, 8);
2824     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2825     OUT_RELOC(batch, 
2826               render_state->render_kernels[kernel].bo,
2827               I915_GEM_DOMAIN_INSTRUCTION, 0,
2828               0);
2829     OUT_BATCH(batch, 
2830               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2831               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2832     OUT_BATCH(batch, 0); /* scratch space base offset */
2833     OUT_BATCH(batch, 
2834               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
2835               GEN7_PS_PUSH_CONSTANT_ENABLE |
2836               GEN7_PS_ATTRIBUTE_ENABLE |
2837               GEN7_PS_16_DISPATCH_ENABLE);
2838     OUT_BATCH(batch, 
2839               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2840     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2841     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2842     ADVANCE_BATCH(batch);
2843 }
2844
2845 static void
2846 gen7_emit_vertex_element_state(VADriverContextP ctx)
2847 {
2848     struct i965_driver_data *i965 = i965_driver_data(ctx);
2849     struct intel_batchbuffer *batch = i965->batch;
2850
2851     /* Set up our vertex elements, sourced from the single vertex buffer. */
2852     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2853     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2854     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2855               GEN6_VE0_VALID |
2856               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2857               (0 << VE0_OFFSET_SHIFT));
2858     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2859               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2860               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2861               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2862     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2863     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2864               GEN6_VE0_VALID |
2865               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2866               (8 << VE0_OFFSET_SHIFT));
2867     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2868               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2869               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2870               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2871 }
2872
2873 static void
2874 gen7_emit_vertices(VADriverContextP ctx)
2875 {
2876     struct i965_driver_data *i965 = i965_driver_data(ctx);
2877     struct intel_batchbuffer *batch = i965->batch;
2878     struct i965_render_state *render_state = &i965->render_state;
2879
2880     BEGIN_BATCH(batch, 5);
2881     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2882     OUT_BATCH(batch, 
2883               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2884               GEN6_VB0_VERTEXDATA |
2885               GEN7_VB0_ADDRESS_MODIFYENABLE |
2886               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2887     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2888     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2889     OUT_BATCH(batch, 0);
2890     ADVANCE_BATCH(batch);
2891
2892     BEGIN_BATCH(batch, 7);
2893     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2894     OUT_BATCH(batch,
2895               _3DPRIM_RECTLIST |
2896               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2897     OUT_BATCH(batch, 3); /* vertex count per instance */
2898     OUT_BATCH(batch, 0); /* start vertex offset */
2899     OUT_BATCH(batch, 1); /* single instance */
2900     OUT_BATCH(batch, 0); /* start instance location */
2901     OUT_BATCH(batch, 0);
2902     ADVANCE_BATCH(batch);
2903 }
2904
2905 static void
2906 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2907 {
2908     struct i965_driver_data *i965 = i965_driver_data(ctx);
2909     struct intel_batchbuffer *batch = i965->batch;
2910
2911     intel_batchbuffer_start_atomic(batch, 0x1000);
2912     intel_batchbuffer_emit_mi_flush(batch);
2913     gen7_emit_invarient_states(ctx);
2914     gen7_emit_state_base_address(ctx);
2915     gen7_emit_viewport_state_pointers(ctx);
2916     gen7_emit_urb(ctx);
2917     gen7_emit_cc_state_pointers(ctx);
2918     gen7_emit_sampler_state_pointers(ctx);
2919     gen7_emit_bypass_state(ctx);
2920     gen7_emit_vs_state(ctx);
2921     gen7_emit_clip_state(ctx);
2922     gen7_emit_sf_state(ctx);
2923     gen7_emit_wm_state(ctx, kernel);
2924     gen7_emit_binding_table(ctx);
2925     gen7_emit_depth_buffer_state(ctx);
2926     gen7_emit_drawing_rectangle(ctx);
2927     gen7_emit_vertex_element_state(ctx);
2928     gen7_emit_vertices(ctx);
2929     intel_batchbuffer_end_atomic(batch);
2930 }
2931
2932 static void
2933 gen7_render_put_surface(
2934     VADriverContextP   ctx,
2935     struct object_surface *obj_surface,    
2936     const VARectangle *src_rect,
2937     const VARectangle *dst_rect,
2938     unsigned int       flags
2939 )
2940 {
2941     struct i965_driver_data *i965 = i965_driver_data(ctx);
2942     struct intel_batchbuffer *batch = i965->batch;
2943
2944     gen7_render_initialize(ctx);
2945     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2946     i965_clear_dest_region(ctx);
2947     gen7_render_emit_states(ctx, PS_KERNEL);
2948     intel_batchbuffer_flush(batch);
2949 }
2950
2951 static void
2952 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2953 {
2954     struct i965_driver_data *i965 = i965_driver_data(ctx);
2955     struct i965_render_state *render_state = &i965->render_state;
2956     struct gen6_blend_state *blend_state;
2957
2958     dri_bo_unmap(render_state->cc.state);    
2959     dri_bo_map(render_state->cc.blend, 1);
2960     assert(render_state->cc.blend->virtual);
2961     blend_state = render_state->cc.blend->virtual;
2962     memset(blend_state, 0, sizeof(*blend_state));
2963     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2964     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2965     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2966     blend_state->blend0.blend_enable = 1;
2967     blend_state->blend1.post_blend_clamp_enable = 1;
2968     blend_state->blend1.pre_blend_clamp_enable = 1;
2969     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2970     dri_bo_unmap(render_state->cc.blend);
2971 }
2972
2973 static void
2974 gen7_subpicture_render_setup_states(
2975     VADriverContextP   ctx,
2976     struct object_surface *obj_surface,
2977     const VARectangle *src_rect,
2978     const VARectangle *dst_rect
2979 )
2980 {
2981     i965_render_dest_surface_state(ctx, 0);
2982     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2983     i965_render_sampler(ctx);
2984     i965_render_cc_viewport(ctx);
2985     gen7_render_color_calc_state(ctx);
2986     gen7_subpicture_render_blend_state(ctx);
2987     gen7_render_depth_stencil_state(ctx);
2988     i965_subpic_render_upload_constants(ctx, obj_surface);
2989     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2990 }
2991
2992 static void
2993 gen7_render_put_subpicture(
2994     VADriverContextP   ctx,
2995     struct object_surface *obj_surface,
2996     const VARectangle *src_rect,
2997     const VARectangle *dst_rect
2998 )
2999 {
3000     struct i965_driver_data *i965 = i965_driver_data(ctx);
3001     struct intel_batchbuffer *batch = i965->batch;
3002     unsigned int index = obj_surface->subpic_render_idx;
3003     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3004
3005     assert(obj_subpic);
3006     gen7_render_initialize(ctx);
3007     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3008     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3009     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3010     intel_batchbuffer_flush(batch);
3011 }
3012
3013
3014 /*
3015  * global functions
3016  */
3017 VAStatus 
3018 i965_DestroySurfaces(VADriverContextP ctx,
3019                      VASurfaceID *surface_list,
3020                      int num_surfaces);
3021 void
3022 intel_render_put_surface(
3023     VADriverContextP   ctx,
3024     struct object_surface *obj_surface,
3025     const VARectangle *src_rect,
3026     const VARectangle *dst_rect,
3027     unsigned int       flags
3028 )
3029 {
3030     struct i965_driver_data *i965 = i965_driver_data(ctx);
3031     int has_done_scaling = 0;
3032     VASurfaceID out_surface_id = i965_post_processing(ctx,
3033                                                       obj_surface,
3034                                                       src_rect,
3035                                                       dst_rect,
3036                                                       flags,
3037                                                       &has_done_scaling);
3038
3039     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3040
3041     if (out_surface_id != VA_INVALID_ID) {
3042         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3043         
3044         if (new_obj_surface && new_obj_surface->bo)
3045             obj_surface = new_obj_surface;
3046
3047         if (has_done_scaling)
3048             src_rect = dst_rect;
3049     }
3050
3051     if (IS_GEN7(i965->intel.device_id))
3052         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3053     else if (IS_GEN6(i965->intel.device_id))
3054         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3055     else
3056         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3057
3058     if (out_surface_id != VA_INVALID_ID)
3059         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3060 }
3061
3062 void
3063 intel_render_put_subpicture(
3064     VADriverContextP   ctx,
3065     struct object_surface *obj_surface,
3066     const VARectangle *src_rect,
3067     const VARectangle *dst_rect
3068 )
3069 {
3070     struct i965_driver_data *i965 = i965_driver_data(ctx);
3071
3072     if (IS_GEN7(i965->intel.device_id))
3073         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3074     else if (IS_GEN6(i965->intel.device_id))
3075         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3076     else
3077         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3078 }
3079
3080 bool 
3081 i965_render_init(VADriverContextP ctx)
3082 {
3083     struct i965_driver_data *i965 = i965_driver_data(ctx);
3084     struct i965_render_state *render_state = &i965->render_state;
3085     int i;
3086
3087     /* kernel */
3088     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3089                                  sizeof(render_kernels_gen5[0])));
3090     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3091                                  sizeof(render_kernels_gen6[0])));
3092
3093     if (IS_GEN7(i965->intel.device_id))
3094         memcpy(render_state->render_kernels,
3095                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3096                sizeof(render_state->render_kernels));
3097     else if (IS_GEN6(i965->intel.device_id))
3098         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3099     else if (IS_IRONLAKE(i965->intel.device_id))
3100         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3101     else
3102         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3103
3104     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3105         struct i965_kernel *kernel = &render_state->render_kernels[i];
3106
3107         if (!kernel->size)
3108             continue;
3109
3110         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3111                                   kernel->name, 
3112                                   kernel->size, 0x1000);
3113         assert(kernel->bo);
3114         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3115     }
3116
3117     /* constant buffer */
3118     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3119                       "constant buffer",
3120                       4096, 64);
3121     assert(render_state->curbe.bo);
3122
3123     if (IS_IVB_GT1(i965->intel.device_id) ||
3124         IS_HSW_GT1(i965->intel.device_id)) {
3125         render_state->max_wm_threads = 48;
3126     } else if (IS_IVB_GT2(i965->intel.device_id) ||
3127                IS_HSW_GT2(i965->intel.device_id)) {
3128         render_state->max_wm_threads = 172;
3129     } else if (IS_SNB_GT1(i965->intel.device_id)) {
3130         render_state->max_wm_threads = 40;
3131     } else if (IS_SNB_GT2(i965->intel.device_id)) {
3132         render_state->max_wm_threads = 80;
3133     } else if (IS_IRONLAKE(i965->intel.device_id)) {
3134         render_state->max_wm_threads = 72; /* 12 * 6 */
3135     } else if (IS_G4X(i965->intel.device_id)) {
3136         render_state->max_wm_threads = 50; /* 12 * 5 */
3137     } else {
3138         /* should never get here !!! */
3139         assert(0);
3140     }
3141
3142     return true;
3143 }
3144
3145 void 
3146 i965_render_terminate(VADriverContextP ctx)
3147 {
3148     int i;
3149     struct i965_driver_data *i965 = i965_driver_data(ctx);
3150     struct i965_render_state *render_state = &i965->render_state;
3151
3152     dri_bo_unreference(render_state->curbe.bo);
3153     render_state->curbe.bo = NULL;
3154
3155     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3156         struct i965_kernel *kernel = &render_state->render_kernels[i];
3157         
3158         dri_bo_unreference(kernel->bo);
3159         kernel->bo = NULL;
3160     }
3161
3162     dri_bo_unreference(render_state->vb.vertex_buffer);
3163     render_state->vb.vertex_buffer = NULL;
3164     dri_bo_unreference(render_state->vs.state);
3165     render_state->vs.state = NULL;
3166     dri_bo_unreference(render_state->sf.state);
3167     render_state->sf.state = NULL;
3168     dri_bo_unreference(render_state->wm.sampler);
3169     render_state->wm.sampler = NULL;
3170     dri_bo_unreference(render_state->wm.state);
3171     render_state->wm.state = NULL;
3172     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3173     dri_bo_unreference(render_state->cc.viewport);
3174     render_state->cc.viewport = NULL;
3175     dri_bo_unreference(render_state->cc.state);
3176     render_state->cc.state = NULL;
3177     dri_bo_unreference(render_state->cc.blend);
3178     render_state->cc.blend = NULL;
3179     dri_bo_unreference(render_state->cc.depth_stencil);
3180     render_state->cc.depth_stencil = NULL;
3181
3182     if (render_state->draw_region) {
3183         dri_bo_unreference(render_state->draw_region->bo);
3184         free(render_state->draw_region);
3185         render_state->draw_region = NULL;
3186     }
3187 }
3188