5b1a1a5978d7688dbe07b682dc7dece4bf169231
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
151 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
152 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
153 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
154 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
155
156 static uint32_t float_to_uint (float f) 
157 {
158     union {
159         uint32_t i; 
160         float f;
161     } x;
162
163     x.f = f;
164     return x.i;
165 }
166
167 enum 
168 {
169     SF_KERNEL = 0,
170     PS_KERNEL,
171     PS_SUBPIC_KERNEL
172 };
173
174 static struct i965_kernel render_kernels_gen4[] = {
175     {
176         "SF",
177         SF_KERNEL,
178         sf_kernel_static,
179         sizeof(sf_kernel_static),
180         NULL
181     },
182     {
183         "PS",
184         PS_KERNEL,
185         ps_kernel_static,
186         sizeof(ps_kernel_static),
187         NULL
188     },
189
190     {
191         "PS_SUBPIC",
192         PS_SUBPIC_KERNEL,
193         ps_subpic_kernel_static,
194         sizeof(ps_subpic_kernel_static),
195         NULL
196     }
197 };
198
199 static struct i965_kernel render_kernels_gen5[] = {
200     {
201         "SF",
202         SF_KERNEL,
203         sf_kernel_static_gen5,
204         sizeof(sf_kernel_static_gen5),
205         NULL
206     },
207     {
208         "PS",
209         PS_KERNEL,
210         ps_kernel_static_gen5,
211         sizeof(ps_kernel_static_gen5),
212         NULL
213     },
214
215     {
216         "PS_SUBPIC",
217         PS_SUBPIC_KERNEL,
218         ps_subpic_kernel_static_gen5,
219         sizeof(ps_subpic_kernel_static_gen5),
220         NULL
221     }
222 };
223
224 static struct i965_kernel render_kernels_gen6[] = {
225     {
226         "SF",
227         SF_KERNEL,
228         sf_kernel_static_gen6,
229         sizeof(sf_kernel_static_gen6),
230         NULL
231     },
232     {
233         "PS",
234         PS_KERNEL,
235         ps_kernel_static_gen6,
236         sizeof(ps_kernel_static_gen6),
237         NULL
238     },
239
240     {
241         "PS_SUBPIC",
242         PS_SUBPIC_KERNEL,
243         ps_subpic_kernel_static_gen6,
244         sizeof(ps_subpic_kernel_static_gen6),
245         NULL
246     }
247 };
248
249 static struct i965_kernel render_kernels_gen7[] = {
250     {
251         "SF",
252         SF_KERNEL,
253         sf_kernel_static_gen7,
254         sizeof(sf_kernel_static_gen7),
255         NULL
256     },
257     {
258         "PS",
259         PS_KERNEL,
260         ps_kernel_static_gen7,
261         sizeof(ps_kernel_static_gen7),
262         NULL
263     },
264
265     {
266         "PS_SUBPIC",
267         PS_SUBPIC_KERNEL,
268         ps_subpic_kernel_static_gen7,
269         sizeof(ps_subpic_kernel_static_gen7),
270         NULL
271     }
272 };
273
274 static struct i965_kernel render_kernels_gen7_haswell[] = {
275     {
276         "SF",
277         SF_KERNEL,
278         sf_kernel_static_gen7,
279         sizeof(sf_kernel_static_gen7),
280         NULL
281     },
282     {
283         "PS",
284         PS_KERNEL,
285         ps_kernel_static_gen7_haswell,
286         sizeof(ps_kernel_static_gen7_haswell),
287         NULL
288     },
289
290     {
291         "PS_SUBPIC",
292         PS_SUBPIC_KERNEL,
293         ps_subpic_kernel_static_gen7,
294         sizeof(ps_subpic_kernel_static_gen7),
295         NULL
296     }
297 };
298
299 #define URB_VS_ENTRIES        8
300 #define URB_VS_ENTRY_SIZE     1
301
302 #define URB_GS_ENTRIES        0
303 #define URB_GS_ENTRY_SIZE     0
304
305 #define URB_CLIP_ENTRIES      0
306 #define URB_CLIP_ENTRY_SIZE   0
307
308 #define URB_SF_ENTRIES        1
309 #define URB_SF_ENTRY_SIZE     2
310
311 #define URB_CS_ENTRIES        4
312 #define URB_CS_ENTRY_SIZE     4
313
314 static float yuv_to_rgb_bt601[3][4] = {
315 {1.164,         0,      1.596,          -0.06275,},
316 {1.164,         -0.392, -0.813,         -0.50196,},
317 {1.164,         2.017,  0,              -0.50196,},
318 };
319
320 static void
321 i965_render_vs_unit(VADriverContextP ctx)
322 {
323     struct i965_driver_data *i965 = i965_driver_data(ctx);
324     struct i965_render_state *render_state = &i965->render_state;
325     struct i965_vs_unit_state *vs_state;
326
327     dri_bo_map(render_state->vs.state, 1);
328     assert(render_state->vs.state->virtual);
329     vs_state = render_state->vs.state->virtual;
330     memset(vs_state, 0, sizeof(*vs_state));
331
332     if (IS_IRONLAKE(i965->intel.device_id))
333         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
334     else
335         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
336
337     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
338     vs_state->vs6.vs_enable = 0;
339     vs_state->vs6.vert_cache_disable = 1;
340     
341     dri_bo_unmap(render_state->vs.state);
342 }
343
344 static void
345 i965_render_sf_unit(VADriverContextP ctx)
346 {
347     struct i965_driver_data *i965 = i965_driver_data(ctx);
348     struct i965_render_state *render_state = &i965->render_state;
349     struct i965_sf_unit_state *sf_state;
350
351     dri_bo_map(render_state->sf.state, 1);
352     assert(render_state->sf.state->virtual);
353     sf_state = render_state->sf.state->virtual;
354     memset(sf_state, 0, sizeof(*sf_state));
355
356     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
357     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
358
359     sf_state->sf1.single_program_flow = 1; /* XXX */
360     sf_state->sf1.binding_table_entry_count = 0;
361     sf_state->sf1.thread_priority = 0;
362     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
363     sf_state->sf1.illegal_op_exception_enable = 1;
364     sf_state->sf1.mask_stack_exception_enable = 1;
365     sf_state->sf1.sw_exception_enable = 1;
366
367     /* scratch space is not used in our kernel */
368     sf_state->thread2.per_thread_scratch_space = 0;
369     sf_state->thread2.scratch_space_base_pointer = 0;
370
371     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
372     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
373     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
374     sf_state->thread3.urb_entry_read_offset = 0;
375     sf_state->thread3.dispatch_grf_start_reg = 3;
376
377     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
378     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
379     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
380     sf_state->thread4.stats_enable = 1;
381
382     sf_state->sf5.viewport_transform = 0; /* skip viewport */
383
384     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
385     sf_state->sf6.scissor = 0;
386
387     sf_state->sf7.trifan_pv = 2;
388
389     sf_state->sf6.dest_org_vbias = 0x8;
390     sf_state->sf6.dest_org_hbias = 0x8;
391
392     dri_bo_emit_reloc(render_state->sf.state,
393                       I915_GEM_DOMAIN_INSTRUCTION, 0,
394                       sf_state->thread0.grf_reg_count << 1,
395                       offsetof(struct i965_sf_unit_state, thread0),
396                       render_state->render_kernels[SF_KERNEL].bo);
397
398     dri_bo_unmap(render_state->sf.state);
399 }
400
401 static void 
402 i965_render_sampler(VADriverContextP ctx)
403 {
404     struct i965_driver_data *i965 = i965_driver_data(ctx);
405     struct i965_render_state *render_state = &i965->render_state;
406     struct i965_sampler_state *sampler_state;
407     int i;
408     
409     assert(render_state->wm.sampler_count > 0);
410     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
411
412     dri_bo_map(render_state->wm.sampler, 1);
413     assert(render_state->wm.sampler->virtual);
414     sampler_state = render_state->wm.sampler->virtual;
415     for (i = 0; i < render_state->wm.sampler_count; i++) {
416         memset(sampler_state, 0, sizeof(*sampler_state));
417         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
418         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
419         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
420         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
421         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
422         sampler_state++;
423     }
424
425     dri_bo_unmap(render_state->wm.sampler);
426 }
427 static void
428 i965_subpic_render_wm_unit(VADriverContextP ctx)
429 {
430     struct i965_driver_data *i965 = i965_driver_data(ctx);
431     struct i965_render_state *render_state = &i965->render_state;
432     struct i965_wm_unit_state *wm_state;
433
434     assert(render_state->wm.sampler);
435
436     dri_bo_map(render_state->wm.state, 1);
437     assert(render_state->wm.state->virtual);
438     wm_state = render_state->wm.state->virtual;
439     memset(wm_state, 0, sizeof(*wm_state));
440
441     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
442     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
443
444     wm_state->thread1.single_program_flow = 1; /* XXX */
445
446     if (IS_IRONLAKE(i965->intel.device_id))
447         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
448     else
449         wm_state->thread1.binding_table_entry_count = 7;
450
451     wm_state->thread2.scratch_space_base_pointer = 0;
452     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
453
454     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
455     wm_state->thread3.const_urb_entry_read_length = 4;
456     wm_state->thread3.const_urb_entry_read_offset = 0;
457     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
458     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
459
460     wm_state->wm4.stats_enable = 0;
461     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
462
463     if (IS_IRONLAKE(i965->intel.device_id)) {
464         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
465     } else {
466         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
467     }
468
469     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
470     wm_state->wm5.thread_dispatch_enable = 1;
471     wm_state->wm5.enable_16_pix = 1;
472     wm_state->wm5.enable_8_pix = 0;
473     wm_state->wm5.early_depth_test = 1;
474
475     dri_bo_emit_reloc(render_state->wm.state,
476                       I915_GEM_DOMAIN_INSTRUCTION, 0,
477                       wm_state->thread0.grf_reg_count << 1,
478                       offsetof(struct i965_wm_unit_state, thread0),
479                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
480
481     dri_bo_emit_reloc(render_state->wm.state,
482                       I915_GEM_DOMAIN_INSTRUCTION, 0,
483                       wm_state->wm4.sampler_count << 2,
484                       offsetof(struct i965_wm_unit_state, wm4),
485                       render_state->wm.sampler);
486
487     dri_bo_unmap(render_state->wm.state);
488 }
489
490
491 static void
492 i965_render_wm_unit(VADriverContextP ctx)
493 {
494     struct i965_driver_data *i965 = i965_driver_data(ctx);
495     struct i965_render_state *render_state = &i965->render_state;
496     struct i965_wm_unit_state *wm_state;
497
498     assert(render_state->wm.sampler);
499
500     dri_bo_map(render_state->wm.state, 1);
501     assert(render_state->wm.state->virtual);
502     wm_state = render_state->wm.state->virtual;
503     memset(wm_state, 0, sizeof(*wm_state));
504
505     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
506     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
507
508     wm_state->thread1.single_program_flow = 1; /* XXX */
509
510     if (IS_IRONLAKE(i965->intel.device_id))
511         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
512     else
513         wm_state->thread1.binding_table_entry_count = 7;
514
515     wm_state->thread2.scratch_space_base_pointer = 0;
516     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
517
518     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
519     wm_state->thread3.const_urb_entry_read_length = 4;
520     wm_state->thread3.const_urb_entry_read_offset = 0;
521     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
522     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
523
524     wm_state->wm4.stats_enable = 0;
525     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
526
527     if (IS_IRONLAKE(i965->intel.device_id)) {
528         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
529     } else {
530         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
531     }
532
533     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
534     wm_state->wm5.thread_dispatch_enable = 1;
535     wm_state->wm5.enable_16_pix = 1;
536     wm_state->wm5.enable_8_pix = 0;
537     wm_state->wm5.early_depth_test = 1;
538
539     dri_bo_emit_reloc(render_state->wm.state,
540                       I915_GEM_DOMAIN_INSTRUCTION, 0,
541                       wm_state->thread0.grf_reg_count << 1,
542                       offsetof(struct i965_wm_unit_state, thread0),
543                       render_state->render_kernels[PS_KERNEL].bo);
544
545     dri_bo_emit_reloc(render_state->wm.state,
546                       I915_GEM_DOMAIN_INSTRUCTION, 0,
547                       wm_state->wm4.sampler_count << 2,
548                       offsetof(struct i965_wm_unit_state, wm4),
549                       render_state->wm.sampler);
550
551     dri_bo_unmap(render_state->wm.state);
552 }
553
554 static void 
555 i965_render_cc_viewport(VADriverContextP ctx)
556 {
557     struct i965_driver_data *i965 = i965_driver_data(ctx);
558     struct i965_render_state *render_state = &i965->render_state;
559     struct i965_cc_viewport *cc_viewport;
560
561     dri_bo_map(render_state->cc.viewport, 1);
562     assert(render_state->cc.viewport->virtual);
563     cc_viewport = render_state->cc.viewport->virtual;
564     memset(cc_viewport, 0, sizeof(*cc_viewport));
565     
566     cc_viewport->min_depth = -1.e35;
567     cc_viewport->max_depth = 1.e35;
568
569     dri_bo_unmap(render_state->cc.viewport);
570 }
571
572 static void 
573 i965_subpic_render_cc_unit(VADriverContextP ctx)
574 {
575     struct i965_driver_data *i965 = i965_driver_data(ctx);
576     struct i965_render_state *render_state = &i965->render_state;
577     struct i965_cc_unit_state *cc_state;
578
579     assert(render_state->cc.viewport);
580
581     dri_bo_map(render_state->cc.state, 1);
582     assert(render_state->cc.state->virtual);
583     cc_state = render_state->cc.state->virtual;
584     memset(cc_state, 0, sizeof(*cc_state));
585
586     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
587     cc_state->cc2.depth_test = 0;       /* disable depth test */
588     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
589     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
590     cc_state->cc3.blend_enable = 1;     /* enable color blend */
591     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
592     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
593     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
594     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
595
596     cc_state->cc5.dither_enable = 0;    /* disable dither */
597     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
598     cc_state->cc5.statistics_enable = 1;
599     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
600     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
601     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
602
603     cc_state->cc6.clamp_post_alpha_blend = 0; 
604     cc_state->cc6.clamp_pre_alpha_blend  =0; 
605     
606     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
607     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
608     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
609     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
610    
611     /*alpha test reference*/
612     cc_state->cc7.alpha_ref.f =0.0 ;
613
614
615     dri_bo_emit_reloc(render_state->cc.state,
616                       I915_GEM_DOMAIN_INSTRUCTION, 0,
617                       0,
618                       offsetof(struct i965_cc_unit_state, cc4),
619                       render_state->cc.viewport);
620
621     dri_bo_unmap(render_state->cc.state);
622 }
623
624
625 static void 
626 i965_render_cc_unit(VADriverContextP ctx)
627 {
628     struct i965_driver_data *i965 = i965_driver_data(ctx);
629     struct i965_render_state *render_state = &i965->render_state;
630     struct i965_cc_unit_state *cc_state;
631
632     assert(render_state->cc.viewport);
633
634     dri_bo_map(render_state->cc.state, 1);
635     assert(render_state->cc.state->virtual);
636     cc_state = render_state->cc.state->virtual;
637     memset(cc_state, 0, sizeof(*cc_state));
638
639     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
640     cc_state->cc2.depth_test = 0;       /* disable depth test */
641     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
642     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
643     cc_state->cc3.blend_enable = 0;     /* disable color blend */
644     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
645     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
646
647     cc_state->cc5.dither_enable = 0;    /* disable dither */
648     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
649     cc_state->cc5.statistics_enable = 1;
650     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
651     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
652     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
653
654     dri_bo_emit_reloc(render_state->cc.state,
655                       I915_GEM_DOMAIN_INSTRUCTION, 0,
656                       0,
657                       offsetof(struct i965_cc_unit_state, cc4),
658                       render_state->cc.viewport);
659
660     dri_bo_unmap(render_state->cc.state);
661 }
662
663 static void
664 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
665 {
666     switch (tiling) {
667     case I915_TILING_NONE:
668         ss->ss3.tiled_surface = 0;
669         ss->ss3.tile_walk = 0;
670         break;
671     case I915_TILING_X:
672         ss->ss3.tiled_surface = 1;
673         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
674         break;
675     case I915_TILING_Y:
676         ss->ss3.tiled_surface = 1;
677         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
678         break;
679     }
680 }
681
682 static void
683 i965_render_set_surface_state(
684     struct i965_surface_state *ss,
685     dri_bo                    *bo,
686     unsigned long              offset,
687     unsigned int               width,
688     unsigned int               height,
689     unsigned int               pitch,
690     unsigned int               format,
691     unsigned int               flags
692 )
693 {
694     unsigned int tiling;
695     unsigned int swizzle;
696
697     memset(ss, 0, sizeof(*ss));
698
699     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
700     case I965_PP_FLAG_BOTTOM_FIELD:
701         ss->ss0.vert_line_stride_ofs = 1;
702         /* fall-through */
703     case I965_PP_FLAG_TOP_FIELD:
704         ss->ss0.vert_line_stride = 1;
705         height /= 2;
706         break;
707     }
708
709     ss->ss0.surface_type = I965_SURFACE_2D;
710     ss->ss0.surface_format = format;
711     ss->ss0.color_blend = 1;
712
713     ss->ss1.base_addr = bo->offset + offset;
714
715     ss->ss2.width = width - 1;
716     ss->ss2.height = height - 1;
717
718     ss->ss3.pitch = pitch - 1;
719
720     dri_bo_get_tiling(bo, &tiling, &swizzle);
721     i965_render_set_surface_tiling(ss, tiling);
722 }
723
724 static void
725 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
726 {
727    switch (tiling) {
728    case I915_TILING_NONE:
729       ss->ss0.tiled_surface = 0;
730       ss->ss0.tile_walk = 0;
731       break;
732    case I915_TILING_X:
733       ss->ss0.tiled_surface = 1;
734       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
735       break;
736    case I915_TILING_Y:
737       ss->ss0.tiled_surface = 1;
738       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
739       break;
740    }
741 }
742
743 /* Set "Shader Channel Select" */
744 void
745 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
746 {
747     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
748     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
749     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
750     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
751 }
752
753 static void
754 gen7_render_set_surface_state(
755     struct gen7_surface_state *ss,
756     dri_bo                    *bo,
757     unsigned long              offset,
758     int                        width,
759     int                        height,
760     int                        pitch,
761     int                        format,
762     unsigned int               flags
763 )
764 {
765     unsigned int tiling;
766     unsigned int swizzle;
767
768     memset(ss, 0, sizeof(*ss));
769
770     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
771     case I965_PP_FLAG_BOTTOM_FIELD:
772         ss->ss0.vert_line_stride_ofs = 1;
773         /* fall-through */
774     case I965_PP_FLAG_TOP_FIELD:
775         ss->ss0.vert_line_stride = 1;
776         height /= 2;
777         break;
778     }
779
780     ss->ss0.surface_type = I965_SURFACE_2D;
781     ss->ss0.surface_format = format;
782
783     ss->ss1.base_addr = bo->offset + offset;
784
785     ss->ss2.width = width - 1;
786     ss->ss2.height = height - 1;
787
788     ss->ss3.pitch = pitch - 1;
789
790     dri_bo_get_tiling(bo, &tiling, &swizzle);
791     gen7_render_set_surface_tiling(ss, tiling);
792 }
793
794 static void
795 i965_render_src_surface_state(
796     VADriverContextP ctx, 
797     int              index,
798     dri_bo          *region,
799     unsigned long    offset,
800     int              w,
801     int              h,
802     int              pitch,
803     int              format,
804     unsigned int     flags
805 )
806 {
807     struct i965_driver_data *i965 = i965_driver_data(ctx);  
808     struct i965_render_state *render_state = &i965->render_state;
809     void *ss;
810     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
811
812     assert(index < MAX_RENDER_SURFACES);
813
814     dri_bo_map(ss_bo, 1);
815     assert(ss_bo->virtual);
816     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
817
818     if (IS_GEN7(i965->intel.device_id)) {
819         gen7_render_set_surface_state(ss,
820                                       region, offset,
821                                       w, h,
822                                       pitch, format, flags);
823         if (IS_HASWELL(i965->intel.device_id))
824             gen7_render_set_surface_scs(ss);
825         dri_bo_emit_reloc(ss_bo,
826                           I915_GEM_DOMAIN_SAMPLER, 0,
827                           offset,
828                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
829                           region);
830     } else {
831         i965_render_set_surface_state(ss,
832                                       region, offset,
833                                       w, h,
834                                       pitch, format, flags);
835         dri_bo_emit_reloc(ss_bo,
836                           I915_GEM_DOMAIN_SAMPLER, 0,
837                           offset,
838                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
839                           region);
840     }
841
842     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
843     dri_bo_unmap(ss_bo);
844     render_state->wm.sampler_count++;
845 }
846
847 static void
848 i965_render_src_surfaces_state(
849     VADriverContextP ctx,
850     struct object_surface *obj_surface,
851     unsigned int     flags
852 )
853 {
854     int region_pitch;
855     int rw, rh;
856     dri_bo *region;
857
858     region_pitch = obj_surface->width;
859     rw = obj_surface->orig_width;
860     rh = obj_surface->orig_height;
861     region = obj_surface->bo;
862
863     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
864     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
865
866     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
867         i965_render_src_surface_state(ctx, 3, region,
868                                       region_pitch * obj_surface->y_cb_offset,
869                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
870                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
871         i965_render_src_surface_state(ctx, 4, region,
872                                       region_pitch * obj_surface->y_cb_offset,
873                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
874                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
875     } else {
876         i965_render_src_surface_state(ctx, 3, region,
877                                       region_pitch * obj_surface->y_cb_offset,
878                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
879                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
880         i965_render_src_surface_state(ctx, 4, region,
881                                       region_pitch * obj_surface->y_cb_offset,
882                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
883                                       I965_SURFACEFORMAT_R8_UNORM, flags);
884         i965_render_src_surface_state(ctx, 5, region,
885                                       region_pitch * obj_surface->y_cr_offset,
886                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
887                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
888         i965_render_src_surface_state(ctx, 6, region,
889                                       region_pitch * obj_surface->y_cr_offset,
890                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
891                                       I965_SURFACEFORMAT_R8_UNORM, flags);
892     }
893 }
894
895 static void
896 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
897                                       struct object_surface *obj_surface)
898 {
899     dri_bo *subpic_region;
900     unsigned int index = obj_surface->subpic_render_idx;
901     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
902     struct object_image *obj_image = obj_subpic->obj_image;
903
904     assert(obj_surface);
905     assert(obj_surface->bo);
906     subpic_region = obj_image->bo;
907     /*subpicture surface*/
908     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
909     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
910 }
911
912 static void
913 i965_render_dest_surface_state(VADriverContextP ctx, int index)
914 {
915     struct i965_driver_data *i965 = i965_driver_data(ctx);  
916     struct i965_render_state *render_state = &i965->render_state;
917     struct intel_region *dest_region = render_state->draw_region;
918     void *ss;
919     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
920     int format;
921     assert(index < MAX_RENDER_SURFACES);
922
923     if (dest_region->cpp == 2) {
924         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
925     } else {
926         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
927     }
928
929     dri_bo_map(ss_bo, 1);
930     assert(ss_bo->virtual);
931     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
932
933     if (IS_GEN7(i965->intel.device_id)) {
934         gen7_render_set_surface_state(ss,
935                                       dest_region->bo, 0,
936                                       dest_region->width, dest_region->height,
937                                       dest_region->pitch, format, 0);
938         if (IS_HASWELL(i965->intel.device_id))
939             gen7_render_set_surface_scs(ss);
940         dri_bo_emit_reloc(ss_bo,
941                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
942                           0,
943                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
944                           dest_region->bo);
945     } else {
946         i965_render_set_surface_state(ss,
947                                       dest_region->bo, 0,
948                                       dest_region->width, dest_region->height,
949                                       dest_region->pitch, format, 0);
950         dri_bo_emit_reloc(ss_bo,
951                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
952                           0,
953                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
954                           dest_region->bo);
955     }
956
957     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
958     dri_bo_unmap(ss_bo);
959 }
960
961 static void
962 i965_fill_vertex_buffer(
963     VADriverContextP ctx,
964     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
965     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
966 )
967 {
968     struct i965_driver_data * const i965 = i965_driver_data(ctx);
969     float vb[12];
970
971     enum { X1, Y1, X2, Y2 };
972
973     static const unsigned int g_rotation_indices[][6] = {
974         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
975         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
976         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
977         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
978     };
979
980     const unsigned int * const rotation_indices =
981         g_rotation_indices[i965->rotation_attrib->value];
982
983     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
984     vb[1]  = tex_coords[rotation_indices[1]];
985     vb[2]  = vid_coords[X2];
986     vb[3]  = vid_coords[Y2];
987
988     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
989     vb[5]  = tex_coords[rotation_indices[3]];
990     vb[6]  = vid_coords[X1];
991     vb[7]  = vid_coords[Y2];
992
993     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
994     vb[9]  = tex_coords[rotation_indices[5]];
995     vb[10] = vid_coords[X1];
996     vb[11] = vid_coords[Y1];
997
998     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
999 }
1000
1001 static void 
1002 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1003                                  struct object_surface *obj_surface,
1004                                  const VARectangle *output_rect)
1005 {    
1006     unsigned int index = obj_surface->subpic_render_idx;
1007     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1008     float tex_coords[4], vid_coords[4];
1009     VARectangle dst_rect;
1010
1011     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1012         dst_rect = obj_subpic->dst_rect;
1013     else {
1014         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1015         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1016         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1017         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1018         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1019         dst_rect.height = sy * obj_subpic->dst_rect.height;
1020     }
1021
1022     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1023     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1024     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1025     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1026
1027     vid_coords[0] = dst_rect.x;
1028     vid_coords[1] = dst_rect.y;
1029     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1030     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1031
1032     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1033 }
1034
1035 static void 
1036 i965_render_upload_vertex(
1037     VADriverContextP   ctx,
1038     struct object_surface *obj_surface,
1039     const VARectangle *src_rect,
1040     const VARectangle *dst_rect
1041 )
1042 {
1043     struct i965_driver_data *i965 = i965_driver_data(ctx);
1044     struct i965_render_state *render_state = &i965->render_state;
1045     struct intel_region *dest_region = render_state->draw_region;
1046     float tex_coords[4], vid_coords[4];
1047     int width, height;
1048
1049     width  = obj_surface->orig_width;
1050     height = obj_surface->orig_height;
1051
1052     tex_coords[0] = (float)src_rect->x / width;
1053     tex_coords[1] = (float)src_rect->y / height;
1054     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1055     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1056
1057     vid_coords[0] = dest_region->x + dst_rect->x;
1058     vid_coords[1] = dest_region->y + dst_rect->y;
1059     vid_coords[2] = vid_coords[0] + dst_rect->width;
1060     vid_coords[3] = vid_coords[1] + dst_rect->height;
1061
1062     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1063 }
1064
1065 #define PI  3.1415926
1066
1067 static void
1068 i965_render_upload_constants(VADriverContextP ctx,
1069                              struct object_surface *obj_surface)
1070 {
1071     struct i965_driver_data *i965 = i965_driver_data(ctx);
1072     struct i965_render_state *render_state = &i965->render_state;
1073     unsigned short *constant_buffer;
1074     float *color_balance_base;
1075     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1076     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1077     float hue = (float)i965->hue_attrib->value / 180 * PI;
1078     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1079     float *yuv_to_rgb;
1080
1081     dri_bo_map(render_state->curbe.bo, 1);
1082     assert(render_state->curbe.bo->virtual);
1083     constant_buffer = render_state->curbe.bo->virtual;
1084
1085     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1086         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1087
1088         constant_buffer[0] = 2;
1089     } else {
1090         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1091             constant_buffer[0] = 1;
1092         else
1093             constant_buffer[0] = 0;
1094     }
1095
1096     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1097         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1098         i965->hue_attrib->value == DEFAULT_HUE &&
1099         i965->saturation_attrib->value == DEFAULT_SATURATION)
1100         constant_buffer[1] = 1; /* skip color balance transformation */
1101     else
1102         constant_buffer[1] = 0;
1103
1104     color_balance_base = (float *)constant_buffer + 4;
1105     *color_balance_base++ = contrast;
1106     *color_balance_base++ = brightness;
1107     *color_balance_base++ = cos(hue) * contrast * saturation;
1108     *color_balance_base++ = sin(hue) * contrast * saturation;
1109
1110     yuv_to_rgb = (float *)constant_buffer + 8;
1111     memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1112
1113     dri_bo_unmap(render_state->curbe.bo);
1114 }
1115
1116 static void
1117 i965_subpic_render_upload_constants(VADriverContextP ctx,
1118                                     struct object_surface *obj_surface)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct i965_render_state *render_state = &i965->render_state;
1122     float *constant_buffer;
1123     float global_alpha = 1.0;
1124     unsigned int index = obj_surface->subpic_render_idx;
1125     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1126     
1127     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1128         global_alpha = obj_subpic->global_alpha;
1129     }
1130
1131     dri_bo_map(render_state->curbe.bo, 1);
1132
1133     assert(render_state->curbe.bo->virtual);
1134     constant_buffer = render_state->curbe.bo->virtual;
1135     *constant_buffer = global_alpha;
1136
1137     dri_bo_unmap(render_state->curbe.bo);
1138 }
1139  
1140 static void
1141 i965_surface_render_state_setup(
1142     VADriverContextP   ctx,
1143     struct object_surface *obj_surface,
1144     const VARectangle *src_rect,
1145     const VARectangle *dst_rect,
1146     unsigned int       flags
1147 )
1148 {
1149     i965_render_vs_unit(ctx);
1150     i965_render_sf_unit(ctx);
1151     i965_render_dest_surface_state(ctx, 0);
1152     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1153     i965_render_sampler(ctx);
1154     i965_render_wm_unit(ctx);
1155     i965_render_cc_viewport(ctx);
1156     i965_render_cc_unit(ctx);
1157     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1158     i965_render_upload_constants(ctx, obj_surface);
1159 }
1160
1161 static void
1162 i965_subpic_render_state_setup(
1163     VADriverContextP   ctx,
1164     struct object_surface *obj_surface,
1165     const VARectangle *src_rect,
1166     const VARectangle *dst_rect
1167 )
1168 {
1169     i965_render_vs_unit(ctx);
1170     i965_render_sf_unit(ctx);
1171     i965_render_dest_surface_state(ctx, 0);
1172     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1173     i965_render_sampler(ctx);
1174     i965_subpic_render_wm_unit(ctx);
1175     i965_render_cc_viewport(ctx);
1176     i965_subpic_render_cc_unit(ctx);
1177     i965_subpic_render_upload_constants(ctx, obj_surface);
1178     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1179 }
1180
1181
1182 static void
1183 i965_render_pipeline_select(VADriverContextP ctx)
1184 {
1185     struct i965_driver_data *i965 = i965_driver_data(ctx);
1186     struct intel_batchbuffer *batch = i965->batch;
1187  
1188     BEGIN_BATCH(batch, 1);
1189     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1190     ADVANCE_BATCH(batch);
1191 }
1192
1193 static void
1194 i965_render_state_sip(VADriverContextP ctx)
1195 {
1196     struct i965_driver_data *i965 = i965_driver_data(ctx);
1197     struct intel_batchbuffer *batch = i965->batch;
1198
1199     BEGIN_BATCH(batch, 2);
1200     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1201     OUT_BATCH(batch, 0);
1202     ADVANCE_BATCH(batch);
1203 }
1204
1205 static void
1206 i965_render_state_base_address(VADriverContextP ctx)
1207 {
1208     struct i965_driver_data *i965 = i965_driver_data(ctx);
1209     struct intel_batchbuffer *batch = i965->batch;
1210     struct i965_render_state *render_state = &i965->render_state;
1211
1212     if (IS_IRONLAKE(i965->intel.device_id)) {
1213         BEGIN_BATCH(batch, 8);
1214         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1215         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1216         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1217         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1218         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1219         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1220         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1221         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1222         ADVANCE_BATCH(batch);
1223     } else {
1224         BEGIN_BATCH(batch, 6);
1225         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1226         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1227         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1228         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1229         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1230         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1231         ADVANCE_BATCH(batch);
1232     }
1233 }
1234
1235 static void
1236 i965_render_binding_table_pointers(VADriverContextP ctx)
1237 {
1238     struct i965_driver_data *i965 = i965_driver_data(ctx);
1239     struct intel_batchbuffer *batch = i965->batch;
1240
1241     BEGIN_BATCH(batch, 6);
1242     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1243     OUT_BATCH(batch, 0); /* vs */
1244     OUT_BATCH(batch, 0); /* gs */
1245     OUT_BATCH(batch, 0); /* clip */
1246     OUT_BATCH(batch, 0); /* sf */
1247     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1248     ADVANCE_BATCH(batch);
1249 }
1250
1251 static void 
1252 i965_render_constant_color(VADriverContextP ctx)
1253 {
1254     struct i965_driver_data *i965 = i965_driver_data(ctx);
1255     struct intel_batchbuffer *batch = i965->batch;
1256
1257     BEGIN_BATCH(batch, 5);
1258     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1259     OUT_BATCH(batch, float_to_uint(1.0));
1260     OUT_BATCH(batch, float_to_uint(0.0));
1261     OUT_BATCH(batch, float_to_uint(1.0));
1262     OUT_BATCH(batch, float_to_uint(1.0));
1263     ADVANCE_BATCH(batch);
1264 }
1265
1266 static void
1267 i965_render_pipelined_pointers(VADriverContextP ctx)
1268 {
1269     struct i965_driver_data *i965 = i965_driver_data(ctx);
1270     struct intel_batchbuffer *batch = i965->batch;
1271     struct i965_render_state *render_state = &i965->render_state;
1272
1273     BEGIN_BATCH(batch, 7);
1274     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1275     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1276     OUT_BATCH(batch, 0);  /* disable GS */
1277     OUT_BATCH(batch, 0);  /* disable CLIP */
1278     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1279     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1280     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1281     ADVANCE_BATCH(batch);
1282 }
1283
1284 static void
1285 i965_render_urb_layout(VADriverContextP ctx)
1286 {
1287     struct i965_driver_data *i965 = i965_driver_data(ctx);
1288     struct intel_batchbuffer *batch = i965->batch;
1289     int urb_vs_start, urb_vs_size;
1290     int urb_gs_start, urb_gs_size;
1291     int urb_clip_start, urb_clip_size;
1292     int urb_sf_start, urb_sf_size;
1293     int urb_cs_start, urb_cs_size;
1294
1295     urb_vs_start = 0;
1296     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1297     urb_gs_start = urb_vs_start + urb_vs_size;
1298     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1299     urb_clip_start = urb_gs_start + urb_gs_size;
1300     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1301     urb_sf_start = urb_clip_start + urb_clip_size;
1302     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1303     urb_cs_start = urb_sf_start + urb_sf_size;
1304     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1305
1306     BEGIN_BATCH(batch, 3);
1307     OUT_BATCH(batch, 
1308               CMD_URB_FENCE |
1309               UF0_CS_REALLOC |
1310               UF0_SF_REALLOC |
1311               UF0_CLIP_REALLOC |
1312               UF0_GS_REALLOC |
1313               UF0_VS_REALLOC |
1314               1);
1315     OUT_BATCH(batch, 
1316               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1317               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1318               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1319     OUT_BATCH(batch,
1320               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1321               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1322     ADVANCE_BATCH(batch);
1323 }
1324
1325 static void 
1326 i965_render_cs_urb_layout(VADriverContextP ctx)
1327 {
1328     struct i965_driver_data *i965 = i965_driver_data(ctx);
1329     struct intel_batchbuffer *batch = i965->batch;
1330
1331     BEGIN_BATCH(batch, 2);
1332     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1333     OUT_BATCH(batch,
1334               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1335               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1336     ADVANCE_BATCH(batch);
1337 }
1338
1339 static void
1340 i965_render_constant_buffer(VADriverContextP ctx)
1341 {
1342     struct i965_driver_data *i965 = i965_driver_data(ctx);
1343     struct intel_batchbuffer *batch = i965->batch;
1344     struct i965_render_state *render_state = &i965->render_state;
1345
1346     BEGIN_BATCH(batch, 2);
1347     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1348     OUT_RELOC(batch, render_state->curbe.bo,
1349               I915_GEM_DOMAIN_INSTRUCTION, 0,
1350               URB_CS_ENTRY_SIZE - 1);
1351     ADVANCE_BATCH(batch);    
1352 }
1353
1354 static void
1355 i965_render_drawing_rectangle(VADriverContextP ctx)
1356 {
1357     struct i965_driver_data *i965 = i965_driver_data(ctx);
1358     struct intel_batchbuffer *batch = i965->batch;
1359     struct i965_render_state *render_state = &i965->render_state;
1360     struct intel_region *dest_region = render_state->draw_region;
1361
1362     BEGIN_BATCH(batch, 4);
1363     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1364     OUT_BATCH(batch, 0x00000000);
1365     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1366     OUT_BATCH(batch, 0x00000000);         
1367     ADVANCE_BATCH(batch);
1368 }
1369
1370 static void
1371 i965_render_vertex_elements(VADriverContextP ctx)
1372 {
1373     struct i965_driver_data *i965 = i965_driver_data(ctx);
1374     struct intel_batchbuffer *batch = i965->batch;
1375
1376     if (IS_IRONLAKE(i965->intel.device_id)) {
1377         BEGIN_BATCH(batch, 5);
1378         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1379         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1380         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1381                   VE0_VALID |
1382                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1383                   (0 << VE0_OFFSET_SHIFT));
1384         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1385                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1386                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1387                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1388         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1389         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1390                   VE0_VALID |
1391                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1392                   (8 << VE0_OFFSET_SHIFT));
1393         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1394                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1395                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1396                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1397         ADVANCE_BATCH(batch);
1398     } else {
1399         BEGIN_BATCH(batch, 5);
1400         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1401         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1402         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1403                   VE0_VALID |
1404                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1405                   (0 << VE0_OFFSET_SHIFT));
1406         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1407                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1408                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1409                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1410                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1411         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1412         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1413                   VE0_VALID |
1414                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1415                   (8 << VE0_OFFSET_SHIFT));
1416         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1417                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1418                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1419                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1420                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1421         ADVANCE_BATCH(batch);
1422     }
1423 }
1424
1425 static void
1426 i965_render_upload_image_palette(
1427     VADriverContextP ctx,
1428     struct object_image *obj_image,
1429     unsigned int     alpha
1430 )
1431 {
1432     struct i965_driver_data *i965 = i965_driver_data(ctx);
1433     struct intel_batchbuffer *batch = i965->batch;
1434     unsigned int i;
1435
1436     assert(obj_image);
1437
1438     if (!obj_image)
1439         return;
1440
1441     if (obj_image->image.num_palette_entries == 0)
1442         return;
1443
1444     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1445     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1446     /*fill palette*/
1447     //int32_t out[16]; //0-23:color 23-31:alpha
1448     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1449         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1450     ADVANCE_BATCH(batch);
1451 }
1452
1453 static void
1454 i965_render_startup(VADriverContextP ctx)
1455 {
1456     struct i965_driver_data *i965 = i965_driver_data(ctx);
1457     struct intel_batchbuffer *batch = i965->batch;
1458     struct i965_render_state *render_state = &i965->render_state;
1459
1460     BEGIN_BATCH(batch, 11);
1461     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1462     OUT_BATCH(batch, 
1463               (0 << VB0_BUFFER_INDEX_SHIFT) |
1464               VB0_VERTEXDATA |
1465               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1466     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1467
1468     if (IS_IRONLAKE(i965->intel.device_id))
1469         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1470     else
1471         OUT_BATCH(batch, 3);
1472
1473     OUT_BATCH(batch, 0);
1474
1475     OUT_BATCH(batch, 
1476               CMD_3DPRIMITIVE |
1477               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1478               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1479               (0 << 9) |
1480               4);
1481     OUT_BATCH(batch, 3); /* vertex count per instance */
1482     OUT_BATCH(batch, 0); /* start vertex offset */
1483     OUT_BATCH(batch, 1); /* single instance */
1484     OUT_BATCH(batch, 0); /* start instance location */
1485     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1486     ADVANCE_BATCH(batch);
1487 }
1488
1489 static void 
1490 i965_clear_dest_region(VADriverContextP ctx)
1491 {
1492     struct i965_driver_data *i965 = i965_driver_data(ctx);
1493     struct intel_batchbuffer *batch = i965->batch;
1494     struct i965_render_state *render_state = &i965->render_state;
1495     struct intel_region *dest_region = render_state->draw_region;
1496     unsigned int blt_cmd, br13;
1497     int pitch;
1498
1499     blt_cmd = XY_COLOR_BLT_CMD;
1500     br13 = 0xf0 << 16;
1501     pitch = dest_region->pitch;
1502
1503     if (dest_region->cpp == 4) {
1504         br13 |= BR13_8888;
1505         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1506     } else {
1507         assert(dest_region->cpp == 2);
1508         br13 |= BR13_565;
1509     }
1510
1511     if (dest_region->tiling != I915_TILING_NONE) {
1512         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1513         pitch /= 4;
1514     }
1515
1516     br13 |= pitch;
1517
1518     if (IS_GEN6(i965->intel.device_id) ||
1519         IS_GEN7(i965->intel.device_id)) {
1520         intel_batchbuffer_start_atomic_blt(batch, 24);
1521         BEGIN_BLT_BATCH(batch, 6);
1522     } else {
1523         intel_batchbuffer_start_atomic(batch, 24);
1524         BEGIN_BATCH(batch, 6);
1525     }
1526
1527     OUT_BATCH(batch, blt_cmd);
1528     OUT_BATCH(batch, br13);
1529     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1530     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1531               (dest_region->x + dest_region->width));
1532     OUT_RELOC(batch, dest_region->bo, 
1533               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1534               0);
1535     OUT_BATCH(batch, 0x0);
1536     ADVANCE_BATCH(batch);
1537     intel_batchbuffer_end_atomic(batch);
1538 }
1539
1540 static void
1541 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1542 {
1543     struct i965_driver_data *i965 = i965_driver_data(ctx);
1544     struct intel_batchbuffer *batch = i965->batch;
1545
1546     i965_clear_dest_region(ctx);
1547     intel_batchbuffer_start_atomic(batch, 0x1000);
1548     intel_batchbuffer_emit_mi_flush(batch);
1549     i965_render_pipeline_select(ctx);
1550     i965_render_state_sip(ctx);
1551     i965_render_state_base_address(ctx);
1552     i965_render_binding_table_pointers(ctx);
1553     i965_render_constant_color(ctx);
1554     i965_render_pipelined_pointers(ctx);
1555     i965_render_urb_layout(ctx);
1556     i965_render_cs_urb_layout(ctx);
1557     i965_render_constant_buffer(ctx);
1558     i965_render_drawing_rectangle(ctx);
1559     i965_render_vertex_elements(ctx);
1560     i965_render_startup(ctx);
1561     intel_batchbuffer_end_atomic(batch);
1562 }
1563
1564 static void
1565 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1566 {
1567     struct i965_driver_data *i965 = i965_driver_data(ctx);
1568     struct intel_batchbuffer *batch = i965->batch;
1569
1570     intel_batchbuffer_start_atomic(batch, 0x1000);
1571     intel_batchbuffer_emit_mi_flush(batch);
1572     i965_render_pipeline_select(ctx);
1573     i965_render_state_sip(ctx);
1574     i965_render_state_base_address(ctx);
1575     i965_render_binding_table_pointers(ctx);
1576     i965_render_constant_color(ctx);
1577     i965_render_pipelined_pointers(ctx);
1578     i965_render_urb_layout(ctx);
1579     i965_render_cs_urb_layout(ctx);
1580     i965_render_drawing_rectangle(ctx);
1581     i965_render_vertex_elements(ctx);
1582     i965_render_startup(ctx);
1583     intel_batchbuffer_end_atomic(batch);
1584 }
1585
1586
1587 static void 
1588 i965_render_initialize(VADriverContextP ctx)
1589 {
1590     struct i965_driver_data *i965 = i965_driver_data(ctx);
1591     struct i965_render_state *render_state = &i965->render_state;
1592     dri_bo *bo;
1593
1594     /* VERTEX BUFFER */
1595     dri_bo_unreference(render_state->vb.vertex_buffer);
1596     bo = dri_bo_alloc(i965->intel.bufmgr,
1597                       "vertex buffer",
1598                       4096,
1599                       4096);
1600     assert(bo);
1601     render_state->vb.vertex_buffer = bo;
1602
1603     /* VS */
1604     dri_bo_unreference(render_state->vs.state);
1605     bo = dri_bo_alloc(i965->intel.bufmgr,
1606                       "vs state",
1607                       sizeof(struct i965_vs_unit_state),
1608                       64);
1609     assert(bo);
1610     render_state->vs.state = bo;
1611
1612     /* GS */
1613     /* CLIP */
1614     /* SF */
1615     dri_bo_unreference(render_state->sf.state);
1616     bo = dri_bo_alloc(i965->intel.bufmgr,
1617                       "sf state",
1618                       sizeof(struct i965_sf_unit_state),
1619                       64);
1620     assert(bo);
1621     render_state->sf.state = bo;
1622
1623     /* WM */
1624     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1625     bo = dri_bo_alloc(i965->intel.bufmgr,
1626                       "surface state & binding table",
1627                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1628                       4096);
1629     assert(bo);
1630     render_state->wm.surface_state_binding_table_bo = bo;
1631
1632     dri_bo_unreference(render_state->wm.sampler);
1633     bo = dri_bo_alloc(i965->intel.bufmgr,
1634                       "sampler state",
1635                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1636                       64);
1637     assert(bo);
1638     render_state->wm.sampler = bo;
1639     render_state->wm.sampler_count = 0;
1640
1641     dri_bo_unreference(render_state->wm.state);
1642     bo = dri_bo_alloc(i965->intel.bufmgr,
1643                       "wm state",
1644                       sizeof(struct i965_wm_unit_state),
1645                       64);
1646     assert(bo);
1647     render_state->wm.state = bo;
1648
1649     /* COLOR CALCULATOR */
1650     dri_bo_unreference(render_state->cc.state);
1651     bo = dri_bo_alloc(i965->intel.bufmgr,
1652                       "color calc state",
1653                       sizeof(struct i965_cc_unit_state),
1654                       64);
1655     assert(bo);
1656     render_state->cc.state = bo;
1657
1658     dri_bo_unreference(render_state->cc.viewport);
1659     bo = dri_bo_alloc(i965->intel.bufmgr,
1660                       "cc viewport",
1661                       sizeof(struct i965_cc_viewport),
1662                       64);
1663     assert(bo);
1664     render_state->cc.viewport = bo;
1665 }
1666
1667 static void
1668 i965_render_put_surface(
1669     VADriverContextP   ctx,
1670     struct object_surface *obj_surface,
1671     const VARectangle *src_rect,
1672     const VARectangle *dst_rect,
1673     unsigned int       flags
1674 )
1675 {
1676     struct i965_driver_data *i965 = i965_driver_data(ctx);
1677     struct intel_batchbuffer *batch = i965->batch;
1678
1679     i965_render_initialize(ctx);
1680     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1681     i965_surface_render_pipeline_setup(ctx);
1682     intel_batchbuffer_flush(batch);
1683 }
1684
1685 static void
1686 i965_render_put_subpicture(
1687     VADriverContextP   ctx,
1688     struct object_surface *obj_surface,
1689     const VARectangle *src_rect,
1690     const VARectangle *dst_rect
1691 )
1692 {
1693     struct i965_driver_data *i965 = i965_driver_data(ctx);
1694     struct intel_batchbuffer *batch = i965->batch;
1695     unsigned int index = obj_surface->subpic_render_idx;
1696     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1697
1698     assert(obj_subpic);
1699
1700     i965_render_initialize(ctx);
1701     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1702     i965_subpic_render_pipeline_setup(ctx);
1703     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1704     intel_batchbuffer_flush(batch);
1705 }
1706
1707 /*
1708  * for GEN6+
1709  */
1710 static void 
1711 gen6_render_initialize(VADriverContextP ctx)
1712 {
1713     struct i965_driver_data *i965 = i965_driver_data(ctx);
1714     struct i965_render_state *render_state = &i965->render_state;
1715     dri_bo *bo;
1716
1717     /* VERTEX BUFFER */
1718     dri_bo_unreference(render_state->vb.vertex_buffer);
1719     bo = dri_bo_alloc(i965->intel.bufmgr,
1720                       "vertex buffer",
1721                       4096,
1722                       4096);
1723     assert(bo);
1724     render_state->vb.vertex_buffer = bo;
1725
1726     /* WM */
1727     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1728     bo = dri_bo_alloc(i965->intel.bufmgr,
1729                       "surface state & binding table",
1730                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1731                       4096);
1732     assert(bo);
1733     render_state->wm.surface_state_binding_table_bo = bo;
1734
1735     dri_bo_unreference(render_state->wm.sampler);
1736     bo = dri_bo_alloc(i965->intel.bufmgr,
1737                       "sampler state",
1738                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1739                       4096);
1740     assert(bo);
1741     render_state->wm.sampler = bo;
1742     render_state->wm.sampler_count = 0;
1743
1744     /* COLOR CALCULATOR */
1745     dri_bo_unreference(render_state->cc.state);
1746     bo = dri_bo_alloc(i965->intel.bufmgr,
1747                       "color calc state",
1748                       sizeof(struct gen6_color_calc_state),
1749                       4096);
1750     assert(bo);
1751     render_state->cc.state = bo;
1752
1753     /* CC VIEWPORT */
1754     dri_bo_unreference(render_state->cc.viewport);
1755     bo = dri_bo_alloc(i965->intel.bufmgr,
1756                       "cc viewport",
1757                       sizeof(struct i965_cc_viewport),
1758                       4096);
1759     assert(bo);
1760     render_state->cc.viewport = bo;
1761
1762     /* BLEND STATE */
1763     dri_bo_unreference(render_state->cc.blend);
1764     bo = dri_bo_alloc(i965->intel.bufmgr,
1765                       "blend state",
1766                       sizeof(struct gen6_blend_state),
1767                       4096);
1768     assert(bo);
1769     render_state->cc.blend = bo;
1770
1771     /* DEPTH & STENCIL STATE */
1772     dri_bo_unreference(render_state->cc.depth_stencil);
1773     bo = dri_bo_alloc(i965->intel.bufmgr,
1774                       "depth & stencil state",
1775                       sizeof(struct gen6_depth_stencil_state),
1776                       4096);
1777     assert(bo);
1778     render_state->cc.depth_stencil = bo;
1779 }
1780
1781 static void
1782 gen6_render_color_calc_state(VADriverContextP ctx)
1783 {
1784     struct i965_driver_data *i965 = i965_driver_data(ctx);
1785     struct i965_render_state *render_state = &i965->render_state;
1786     struct gen6_color_calc_state *color_calc_state;
1787     
1788     dri_bo_map(render_state->cc.state, 1);
1789     assert(render_state->cc.state->virtual);
1790     color_calc_state = render_state->cc.state->virtual;
1791     memset(color_calc_state, 0, sizeof(*color_calc_state));
1792     color_calc_state->constant_r = 1.0;
1793     color_calc_state->constant_g = 0.0;
1794     color_calc_state->constant_b = 1.0;
1795     color_calc_state->constant_a = 1.0;
1796     dri_bo_unmap(render_state->cc.state);
1797 }
1798
1799 static void
1800 gen6_render_blend_state(VADriverContextP ctx)
1801 {
1802     struct i965_driver_data *i965 = i965_driver_data(ctx);
1803     struct i965_render_state *render_state = &i965->render_state;
1804     struct gen6_blend_state *blend_state;
1805     
1806     dri_bo_map(render_state->cc.blend, 1);
1807     assert(render_state->cc.blend->virtual);
1808     blend_state = render_state->cc.blend->virtual;
1809     memset(blend_state, 0, sizeof(*blend_state));
1810     blend_state->blend1.logic_op_enable = 1;
1811     blend_state->blend1.logic_op_func = 0xc;
1812     dri_bo_unmap(render_state->cc.blend);
1813 }
1814
1815 static void
1816 gen6_render_depth_stencil_state(VADriverContextP ctx)
1817 {
1818     struct i965_driver_data *i965 = i965_driver_data(ctx);
1819     struct i965_render_state *render_state = &i965->render_state;
1820     struct gen6_depth_stencil_state *depth_stencil_state;
1821     
1822     dri_bo_map(render_state->cc.depth_stencil, 1);
1823     assert(render_state->cc.depth_stencil->virtual);
1824     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1825     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1826     dri_bo_unmap(render_state->cc.depth_stencil);
1827 }
1828
1829 static void
1830 gen6_render_setup_states(
1831     VADriverContextP   ctx,
1832     struct object_surface *obj_surface,
1833     const VARectangle *src_rect,
1834     const VARectangle *dst_rect,
1835     unsigned int       flags
1836 )
1837 {
1838     i965_render_dest_surface_state(ctx, 0);
1839     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1840     i965_render_sampler(ctx);
1841     i965_render_cc_viewport(ctx);
1842     gen6_render_color_calc_state(ctx);
1843     gen6_render_blend_state(ctx);
1844     gen6_render_depth_stencil_state(ctx);
1845     i965_render_upload_constants(ctx, obj_surface);
1846     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1847 }
1848
1849 static void
1850 gen6_emit_invarient_states(VADriverContextP ctx)
1851 {
1852     struct i965_driver_data *i965 = i965_driver_data(ctx);
1853     struct intel_batchbuffer *batch = i965->batch;
1854
1855     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1856
1857     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1858     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1859               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1860     OUT_BATCH(batch, 0);
1861
1862     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1863     OUT_BATCH(batch, 1);
1864
1865     /* Set system instruction pointer */
1866     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1867     OUT_BATCH(batch, 0);
1868 }
1869
1870 static void
1871 gen6_emit_state_base_address(VADriverContextP ctx)
1872 {
1873     struct i965_driver_data *i965 = i965_driver_data(ctx);
1874     struct intel_batchbuffer *batch = i965->batch;
1875     struct i965_render_state *render_state = &i965->render_state;
1876
1877     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1878     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1879     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1880     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1881     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1882     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1883     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1884     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1885     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1886     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1887 }
1888
1889 static void
1890 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1891 {
1892     struct i965_driver_data *i965 = i965_driver_data(ctx);
1893     struct intel_batchbuffer *batch = i965->batch;
1894     struct i965_render_state *render_state = &i965->render_state;
1895
1896     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1897               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1898               (4 - 2));
1899     OUT_BATCH(batch, 0);
1900     OUT_BATCH(batch, 0);
1901     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1902 }
1903
1904 static void
1905 gen6_emit_urb(VADriverContextP ctx)
1906 {
1907     struct i965_driver_data *i965 = i965_driver_data(ctx);
1908     struct intel_batchbuffer *batch = i965->batch;
1909
1910     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1911     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1912               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1913     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1914               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1915 }
1916
1917 static void
1918 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1919 {
1920     struct i965_driver_data *i965 = i965_driver_data(ctx);
1921     struct intel_batchbuffer *batch = i965->batch;
1922     struct i965_render_state *render_state = &i965->render_state;
1923
1924     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1925     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1926     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1927     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1928 }
1929
1930 static void
1931 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1932 {
1933     struct i965_driver_data *i965 = i965_driver_data(ctx);
1934     struct intel_batchbuffer *batch = i965->batch;
1935     struct i965_render_state *render_state = &i965->render_state;
1936
1937     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1938               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1939               (4 - 2));
1940     OUT_BATCH(batch, 0); /* VS */
1941     OUT_BATCH(batch, 0); /* GS */
1942     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1943 }
1944
1945 static void
1946 gen6_emit_binding_table(VADriverContextP ctx)
1947 {
1948     struct i965_driver_data *i965 = i965_driver_data(ctx);
1949     struct intel_batchbuffer *batch = i965->batch;
1950
1951     /* Binding table pointers */
1952     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1953               GEN6_BINDING_TABLE_MODIFY_PS |
1954               (4 - 2));
1955     OUT_BATCH(batch, 0);                /* vs */
1956     OUT_BATCH(batch, 0);                /* gs */
1957     /* Only the PS uses the binding table */
1958     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1959 }
1960
1961 static void
1962 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1963 {
1964     struct i965_driver_data *i965 = i965_driver_data(ctx);
1965     struct intel_batchbuffer *batch = i965->batch;
1966
1967     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1968     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1969               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1970     OUT_BATCH(batch, 0);
1971     OUT_BATCH(batch, 0);
1972     OUT_BATCH(batch, 0);
1973     OUT_BATCH(batch, 0);
1974     OUT_BATCH(batch, 0);
1975
1976     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1977     OUT_BATCH(batch, 0);
1978 }
1979
1980 static void
1981 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1982 {
1983     i965_render_drawing_rectangle(ctx);
1984 }
1985
1986 static void 
1987 gen6_emit_vs_state(VADriverContextP ctx)
1988 {
1989     struct i965_driver_data *i965 = i965_driver_data(ctx);
1990     struct intel_batchbuffer *batch = i965->batch;
1991
1992     /* disable VS constant buffer */
1993     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1994     OUT_BATCH(batch, 0);
1995     OUT_BATCH(batch, 0);
1996     OUT_BATCH(batch, 0);
1997     OUT_BATCH(batch, 0);
1998         
1999     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2000     OUT_BATCH(batch, 0); /* without VS kernel */
2001     OUT_BATCH(batch, 0);
2002     OUT_BATCH(batch, 0);
2003     OUT_BATCH(batch, 0);
2004     OUT_BATCH(batch, 0); /* pass-through */
2005 }
2006
2007 static void 
2008 gen6_emit_gs_state(VADriverContextP ctx)
2009 {
2010     struct i965_driver_data *i965 = i965_driver_data(ctx);
2011     struct intel_batchbuffer *batch = i965->batch;
2012
2013     /* disable GS constant buffer */
2014     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2015     OUT_BATCH(batch, 0);
2016     OUT_BATCH(batch, 0);
2017     OUT_BATCH(batch, 0);
2018     OUT_BATCH(batch, 0);
2019         
2020     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2021     OUT_BATCH(batch, 0); /* without GS kernel */
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0);
2024     OUT_BATCH(batch, 0);
2025     OUT_BATCH(batch, 0);
2026     OUT_BATCH(batch, 0); /* pass-through */
2027 }
2028
2029 static void 
2030 gen6_emit_clip_state(VADriverContextP ctx)
2031 {
2032     struct i965_driver_data *i965 = i965_driver_data(ctx);
2033     struct intel_batchbuffer *batch = i965->batch;
2034
2035     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2036     OUT_BATCH(batch, 0);
2037     OUT_BATCH(batch, 0); /* pass-through */
2038     OUT_BATCH(batch, 0);
2039 }
2040
2041 static void 
2042 gen6_emit_sf_state(VADriverContextP ctx)
2043 {
2044     struct i965_driver_data *i965 = i965_driver_data(ctx);
2045     struct intel_batchbuffer *batch = i965->batch;
2046
2047     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2048     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2049               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2050               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2051     OUT_BATCH(batch, 0);
2052     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2053     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2054     OUT_BATCH(batch, 0);
2055     OUT_BATCH(batch, 0);
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch, 0);
2058     OUT_BATCH(batch, 0); /* DW9 */
2059     OUT_BATCH(batch, 0);
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0);
2063     OUT_BATCH(batch, 0); /* DW14 */
2064     OUT_BATCH(batch, 0);
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     OUT_BATCH(batch, 0);
2068     OUT_BATCH(batch, 0); /* DW19 */
2069 }
2070
2071 static void 
2072 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2073 {
2074     struct i965_driver_data *i965 = i965_driver_data(ctx);
2075     struct intel_batchbuffer *batch = i965->batch;
2076     struct i965_render_state *render_state = &i965->render_state;
2077
2078     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2079               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2080               (5 - 2));
2081     OUT_RELOC(batch, 
2082               render_state->curbe.bo,
2083               I915_GEM_DOMAIN_INSTRUCTION, 0,
2084               (URB_CS_ENTRY_SIZE-1));
2085     OUT_BATCH(batch, 0);
2086     OUT_BATCH(batch, 0);
2087     OUT_BATCH(batch, 0);
2088
2089     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2090     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2091               I915_GEM_DOMAIN_INSTRUCTION, 0,
2092               0);
2093     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2094               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2095     OUT_BATCH(batch, 0);
2096     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2097     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2098               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2099               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2100     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2101               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2102     OUT_BATCH(batch, 0);
2103     OUT_BATCH(batch, 0);
2104 }
2105
2106 static void
2107 gen6_emit_vertex_element_state(VADriverContextP ctx)
2108 {
2109     struct i965_driver_data *i965 = i965_driver_data(ctx);
2110     struct intel_batchbuffer *batch = i965->batch;
2111
2112     /* Set up our vertex elements, sourced from the single vertex buffer. */
2113     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2114     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2115     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2116               GEN6_VE0_VALID |
2117               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2118               (0 << VE0_OFFSET_SHIFT));
2119     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2120               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2121               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2122               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2123     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2124     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2125               GEN6_VE0_VALID |
2126               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2127               (8 << VE0_OFFSET_SHIFT));
2128     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2129               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2130               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2131               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2132 }
2133
2134 static void
2135 gen6_emit_vertices(VADriverContextP ctx)
2136 {
2137     struct i965_driver_data *i965 = i965_driver_data(ctx);
2138     struct intel_batchbuffer *batch = i965->batch;
2139     struct i965_render_state *render_state = &i965->render_state;
2140
2141     BEGIN_BATCH(batch, 11);
2142     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2143     OUT_BATCH(batch, 
2144               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2145               GEN6_VB0_VERTEXDATA |
2146               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2147     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2148     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2149     OUT_BATCH(batch, 0);
2150
2151     OUT_BATCH(batch, 
2152               CMD_3DPRIMITIVE |
2153               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2154               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2155               (0 << 9) |
2156               4);
2157     OUT_BATCH(batch, 3); /* vertex count per instance */
2158     OUT_BATCH(batch, 0); /* start vertex offset */
2159     OUT_BATCH(batch, 1); /* single instance */
2160     OUT_BATCH(batch, 0); /* start instance location */
2161     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2162     ADVANCE_BATCH(batch);
2163 }
2164
2165 static void
2166 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2167 {
2168     struct i965_driver_data *i965 = i965_driver_data(ctx);
2169     struct intel_batchbuffer *batch = i965->batch;
2170
2171     intel_batchbuffer_start_atomic(batch, 0x1000);
2172     intel_batchbuffer_emit_mi_flush(batch);
2173     gen6_emit_invarient_states(ctx);
2174     gen6_emit_state_base_address(ctx);
2175     gen6_emit_viewport_state_pointers(ctx);
2176     gen6_emit_urb(ctx);
2177     gen6_emit_cc_state_pointers(ctx);
2178     gen6_emit_sampler_state_pointers(ctx);
2179     gen6_emit_vs_state(ctx);
2180     gen6_emit_gs_state(ctx);
2181     gen6_emit_clip_state(ctx);
2182     gen6_emit_sf_state(ctx);
2183     gen6_emit_wm_state(ctx, kernel);
2184     gen6_emit_binding_table(ctx);
2185     gen6_emit_depth_buffer_state(ctx);
2186     gen6_emit_drawing_rectangle(ctx);
2187     gen6_emit_vertex_element_state(ctx);
2188     gen6_emit_vertices(ctx);
2189     intel_batchbuffer_end_atomic(batch);
2190 }
2191
2192 static void
2193 gen6_render_put_surface(
2194     VADriverContextP   ctx,
2195     struct object_surface *obj_surface,
2196     const VARectangle *src_rect,
2197     const VARectangle *dst_rect,
2198     unsigned int       flags
2199 )
2200 {
2201     struct i965_driver_data *i965 = i965_driver_data(ctx);
2202     struct intel_batchbuffer *batch = i965->batch;
2203
2204     gen6_render_initialize(ctx);
2205     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2206     i965_clear_dest_region(ctx);
2207     gen6_render_emit_states(ctx, PS_KERNEL);
2208     intel_batchbuffer_flush(batch);
2209 }
2210
2211 static void
2212 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2213 {
2214     struct i965_driver_data *i965 = i965_driver_data(ctx);
2215     struct i965_render_state *render_state = &i965->render_state;
2216     struct gen6_blend_state *blend_state;
2217
2218     dri_bo_unmap(render_state->cc.state);    
2219     dri_bo_map(render_state->cc.blend, 1);
2220     assert(render_state->cc.blend->virtual);
2221     blend_state = render_state->cc.blend->virtual;
2222     memset(blend_state, 0, sizeof(*blend_state));
2223     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2224     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2225     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2226     blend_state->blend0.blend_enable = 1;
2227     blend_state->blend1.post_blend_clamp_enable = 1;
2228     blend_state->blend1.pre_blend_clamp_enable = 1;
2229     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2230     dri_bo_unmap(render_state->cc.blend);
2231 }
2232
2233 static void
2234 gen6_subpicture_render_setup_states(
2235     VADriverContextP   ctx,
2236     struct object_surface *obj_surface,
2237     const VARectangle *src_rect,
2238     const VARectangle *dst_rect
2239 )
2240 {
2241     i965_render_dest_surface_state(ctx, 0);
2242     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2243     i965_render_sampler(ctx);
2244     i965_render_cc_viewport(ctx);
2245     gen6_render_color_calc_state(ctx);
2246     gen6_subpicture_render_blend_state(ctx);
2247     gen6_render_depth_stencil_state(ctx);
2248     i965_subpic_render_upload_constants(ctx, obj_surface);
2249     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2250 }
2251
2252 static void
2253 gen6_render_put_subpicture(
2254     VADriverContextP   ctx,
2255     struct object_surface *obj_surface,
2256     const VARectangle *src_rect,
2257     const VARectangle *dst_rect
2258 )
2259 {
2260     struct i965_driver_data *i965 = i965_driver_data(ctx);
2261     struct intel_batchbuffer *batch = i965->batch;
2262     unsigned int index = obj_surface->subpic_render_idx;
2263     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2264
2265     assert(obj_subpic);
2266     gen6_render_initialize(ctx);
2267     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2268     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2269     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2270     intel_batchbuffer_flush(batch);
2271 }
2272
2273 /*
2274  * for GEN7
2275  */
2276 static void 
2277 gen7_render_initialize(VADriverContextP ctx)
2278 {
2279     struct i965_driver_data *i965 = i965_driver_data(ctx);
2280     struct i965_render_state *render_state = &i965->render_state;
2281     dri_bo *bo;
2282
2283     /* VERTEX BUFFER */
2284     dri_bo_unreference(render_state->vb.vertex_buffer);
2285     bo = dri_bo_alloc(i965->intel.bufmgr,
2286                       "vertex buffer",
2287                       4096,
2288                       4096);
2289     assert(bo);
2290     render_state->vb.vertex_buffer = bo;
2291
2292     /* WM */
2293     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2294     bo = dri_bo_alloc(i965->intel.bufmgr,
2295                       "surface state & binding table",
2296                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2297                       4096);
2298     assert(bo);
2299     render_state->wm.surface_state_binding_table_bo = bo;
2300
2301     dri_bo_unreference(render_state->wm.sampler);
2302     bo = dri_bo_alloc(i965->intel.bufmgr,
2303                       "sampler state",
2304                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2305                       4096);
2306     assert(bo);
2307     render_state->wm.sampler = bo;
2308     render_state->wm.sampler_count = 0;
2309
2310     /* COLOR CALCULATOR */
2311     dri_bo_unreference(render_state->cc.state);
2312     bo = dri_bo_alloc(i965->intel.bufmgr,
2313                       "color calc state",
2314                       sizeof(struct gen6_color_calc_state),
2315                       4096);
2316     assert(bo);
2317     render_state->cc.state = bo;
2318
2319     /* CC VIEWPORT */
2320     dri_bo_unreference(render_state->cc.viewport);
2321     bo = dri_bo_alloc(i965->intel.bufmgr,
2322                       "cc viewport",
2323                       sizeof(struct i965_cc_viewport),
2324                       4096);
2325     assert(bo);
2326     render_state->cc.viewport = bo;
2327
2328     /* BLEND STATE */
2329     dri_bo_unreference(render_state->cc.blend);
2330     bo = dri_bo_alloc(i965->intel.bufmgr,
2331                       "blend state",
2332                       sizeof(struct gen6_blend_state),
2333                       4096);
2334     assert(bo);
2335     render_state->cc.blend = bo;
2336
2337     /* DEPTH & STENCIL STATE */
2338     dri_bo_unreference(render_state->cc.depth_stencil);
2339     bo = dri_bo_alloc(i965->intel.bufmgr,
2340                       "depth & stencil state",
2341                       sizeof(struct gen6_depth_stencil_state),
2342                       4096);
2343     assert(bo);
2344     render_state->cc.depth_stencil = bo;
2345 }
2346
2347 static void
2348 gen7_render_color_calc_state(VADriverContextP ctx)
2349 {
2350     struct i965_driver_data *i965 = i965_driver_data(ctx);
2351     struct i965_render_state *render_state = &i965->render_state;
2352     struct gen6_color_calc_state *color_calc_state;
2353     
2354     dri_bo_map(render_state->cc.state, 1);
2355     assert(render_state->cc.state->virtual);
2356     color_calc_state = render_state->cc.state->virtual;
2357     memset(color_calc_state, 0, sizeof(*color_calc_state));
2358     color_calc_state->constant_r = 1.0;
2359     color_calc_state->constant_g = 0.0;
2360     color_calc_state->constant_b = 1.0;
2361     color_calc_state->constant_a = 1.0;
2362     dri_bo_unmap(render_state->cc.state);
2363 }
2364
2365 static void
2366 gen7_render_blend_state(VADriverContextP ctx)
2367 {
2368     struct i965_driver_data *i965 = i965_driver_data(ctx);
2369     struct i965_render_state *render_state = &i965->render_state;
2370     struct gen6_blend_state *blend_state;
2371     
2372     dri_bo_map(render_state->cc.blend, 1);
2373     assert(render_state->cc.blend->virtual);
2374     blend_state = render_state->cc.blend->virtual;
2375     memset(blend_state, 0, sizeof(*blend_state));
2376     blend_state->blend1.logic_op_enable = 1;
2377     blend_state->blend1.logic_op_func = 0xc;
2378     blend_state->blend1.pre_blend_clamp_enable = 1;
2379     dri_bo_unmap(render_state->cc.blend);
2380 }
2381
2382 static void
2383 gen7_render_depth_stencil_state(VADriverContextP ctx)
2384 {
2385     struct i965_driver_data *i965 = i965_driver_data(ctx);
2386     struct i965_render_state *render_state = &i965->render_state;
2387     struct gen6_depth_stencil_state *depth_stencil_state;
2388     
2389     dri_bo_map(render_state->cc.depth_stencil, 1);
2390     assert(render_state->cc.depth_stencil->virtual);
2391     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2392     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2393     dri_bo_unmap(render_state->cc.depth_stencil);
2394 }
2395
2396 static void 
2397 gen7_render_sampler(VADriverContextP ctx)
2398 {
2399     struct i965_driver_data *i965 = i965_driver_data(ctx);
2400     struct i965_render_state *render_state = &i965->render_state;
2401     struct gen7_sampler_state *sampler_state;
2402     int i;
2403     
2404     assert(render_state->wm.sampler_count > 0);
2405     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2406
2407     dri_bo_map(render_state->wm.sampler, 1);
2408     assert(render_state->wm.sampler->virtual);
2409     sampler_state = render_state->wm.sampler->virtual;
2410     for (i = 0; i < render_state->wm.sampler_count; i++) {
2411         memset(sampler_state, 0, sizeof(*sampler_state));
2412         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2413         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2414         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2415         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2416         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2417         sampler_state++;
2418     }
2419
2420     dri_bo_unmap(render_state->wm.sampler);
2421 }
2422
2423 static void
2424 gen7_render_setup_states(
2425     VADriverContextP   ctx,
2426     struct object_surface *obj_surface,
2427     const VARectangle *src_rect,
2428     const VARectangle *dst_rect,
2429     unsigned int       flags
2430 )
2431 {
2432     i965_render_dest_surface_state(ctx, 0);
2433     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2434     gen7_render_sampler(ctx);
2435     i965_render_cc_viewport(ctx);
2436     gen7_render_color_calc_state(ctx);
2437     gen7_render_blend_state(ctx);
2438     gen7_render_depth_stencil_state(ctx);
2439     i965_render_upload_constants(ctx, obj_surface);
2440     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2441 }
2442
2443 static void
2444 gen7_emit_invarient_states(VADriverContextP ctx)
2445 {
2446     struct i965_driver_data *i965 = i965_driver_data(ctx);
2447     struct intel_batchbuffer *batch = i965->batch;
2448
2449     BEGIN_BATCH(batch, 1);
2450     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2451     ADVANCE_BATCH(batch);
2452
2453     BEGIN_BATCH(batch, 4);
2454     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2455     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2456               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2457     OUT_BATCH(batch, 0);
2458     OUT_BATCH(batch, 0);
2459     ADVANCE_BATCH(batch);
2460
2461     BEGIN_BATCH(batch, 2);
2462     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2463     OUT_BATCH(batch, 1);
2464     ADVANCE_BATCH(batch);
2465
2466     /* Set system instruction pointer */
2467     BEGIN_BATCH(batch, 2);
2468     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2469     OUT_BATCH(batch, 0);
2470     ADVANCE_BATCH(batch);
2471 }
2472
2473 static void
2474 gen7_emit_state_base_address(VADriverContextP ctx)
2475 {
2476     struct i965_driver_data *i965 = i965_driver_data(ctx);
2477     struct intel_batchbuffer *batch = i965->batch;
2478     struct i965_render_state *render_state = &i965->render_state;
2479
2480     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2481     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2482     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2483     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2484     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2485     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2486     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2487     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2488     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2489     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2490 }
2491
2492 static void
2493 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2494 {
2495     struct i965_driver_data *i965 = i965_driver_data(ctx);
2496     struct intel_batchbuffer *batch = i965->batch;
2497     struct i965_render_state *render_state = &i965->render_state;
2498
2499     BEGIN_BATCH(batch, 2);
2500     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2501     OUT_RELOC(batch,
2502               render_state->cc.viewport,
2503               I915_GEM_DOMAIN_INSTRUCTION, 0,
2504               0);
2505     ADVANCE_BATCH(batch);
2506
2507     BEGIN_BATCH(batch, 2);
2508     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2509     OUT_BATCH(batch, 0);
2510     ADVANCE_BATCH(batch);
2511 }
2512
2513 /*
2514  * URB layout on GEN7 
2515  * ----------------------------------------
2516  * | PS Push Constants (8KB) | VS entries |
2517  * ----------------------------------------
2518  */
2519 static void
2520 gen7_emit_urb(VADriverContextP ctx)
2521 {
2522     struct i965_driver_data *i965 = i965_driver_data(ctx);
2523     struct intel_batchbuffer *batch = i965->batch;
2524     unsigned int num_urb_entries = 32;
2525
2526     if (IS_HASWELL(i965->intel.device_id))
2527         num_urb_entries = 64;
2528
2529     BEGIN_BATCH(batch, 2);
2530     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2531     OUT_BATCH(batch, 8); /* in 1KBs */
2532     ADVANCE_BATCH(batch);
2533
2534     BEGIN_BATCH(batch, 2);
2535     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2536     OUT_BATCH(batch, 
2537               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2538               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2539               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2540    ADVANCE_BATCH(batch);
2541
2542    BEGIN_BATCH(batch, 2);
2543    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2544    OUT_BATCH(batch,
2545              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2546              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2547    ADVANCE_BATCH(batch);
2548
2549    BEGIN_BATCH(batch, 2);
2550    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2551    OUT_BATCH(batch,
2552              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2553              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2554    ADVANCE_BATCH(batch);
2555
2556    BEGIN_BATCH(batch, 2);
2557    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2558    OUT_BATCH(batch,
2559              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2560              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2561    ADVANCE_BATCH(batch);
2562 }
2563
2564 static void
2565 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2566 {
2567     struct i965_driver_data *i965 = i965_driver_data(ctx);
2568     struct intel_batchbuffer *batch = i965->batch;
2569     struct i965_render_state *render_state = &i965->render_state;
2570
2571     BEGIN_BATCH(batch, 2);
2572     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2573     OUT_RELOC(batch,
2574               render_state->cc.state,
2575               I915_GEM_DOMAIN_INSTRUCTION, 0,
2576               1);
2577     ADVANCE_BATCH(batch);
2578
2579     BEGIN_BATCH(batch, 2);
2580     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2581     OUT_RELOC(batch,
2582               render_state->cc.blend,
2583               I915_GEM_DOMAIN_INSTRUCTION, 0,
2584               1);
2585     ADVANCE_BATCH(batch);
2586
2587     BEGIN_BATCH(batch, 2);
2588     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2589     OUT_RELOC(batch,
2590               render_state->cc.depth_stencil,
2591               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2592               1);
2593     ADVANCE_BATCH(batch);
2594 }
2595
2596 static void
2597 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2598 {
2599     struct i965_driver_data *i965 = i965_driver_data(ctx);
2600     struct intel_batchbuffer *batch = i965->batch;
2601     struct i965_render_state *render_state = &i965->render_state;
2602
2603     BEGIN_BATCH(batch, 2);
2604     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2605     OUT_RELOC(batch,
2606               render_state->wm.sampler,
2607               I915_GEM_DOMAIN_INSTRUCTION, 0,
2608               0);
2609     ADVANCE_BATCH(batch);
2610 }
2611
2612 static void
2613 gen7_emit_binding_table(VADriverContextP ctx)
2614 {
2615     struct i965_driver_data *i965 = i965_driver_data(ctx);
2616     struct intel_batchbuffer *batch = i965->batch;
2617
2618     BEGIN_BATCH(batch, 2);
2619     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2620     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2621     ADVANCE_BATCH(batch);
2622 }
2623
2624 static void
2625 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2626 {
2627     struct i965_driver_data *i965 = i965_driver_data(ctx);
2628     struct intel_batchbuffer *batch = i965->batch;
2629
2630     BEGIN_BATCH(batch, 7);
2631     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2632     OUT_BATCH(batch,
2633               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2634               (I965_SURFACE_NULL << 29));
2635     OUT_BATCH(batch, 0);
2636     OUT_BATCH(batch, 0);
2637     OUT_BATCH(batch, 0);
2638     OUT_BATCH(batch, 0);
2639     OUT_BATCH(batch, 0);
2640     ADVANCE_BATCH(batch);
2641
2642     BEGIN_BATCH(batch, 3);
2643     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2644     OUT_BATCH(batch, 0);
2645     OUT_BATCH(batch, 0);
2646     ADVANCE_BATCH(batch);
2647 }
2648
2649 static void
2650 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2651 {
2652     i965_render_drawing_rectangle(ctx);
2653 }
2654
2655 static void 
2656 gen7_emit_vs_state(VADriverContextP ctx)
2657 {
2658     struct i965_driver_data *i965 = i965_driver_data(ctx);
2659     struct intel_batchbuffer *batch = i965->batch;
2660
2661     /* disable VS constant buffer */
2662     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0);
2665     OUT_BATCH(batch, 0);
2666     OUT_BATCH(batch, 0);
2667     OUT_BATCH(batch, 0);
2668     OUT_BATCH(batch, 0);
2669         
2670     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2671     OUT_BATCH(batch, 0); /* without VS kernel */
2672     OUT_BATCH(batch, 0);
2673     OUT_BATCH(batch, 0);
2674     OUT_BATCH(batch, 0);
2675     OUT_BATCH(batch, 0); /* pass-through */
2676 }
2677
2678 static void 
2679 gen7_emit_bypass_state(VADriverContextP ctx)
2680 {
2681     struct i965_driver_data *i965 = i965_driver_data(ctx);
2682     struct intel_batchbuffer *batch = i965->batch;
2683
2684     /* bypass GS */
2685     BEGIN_BATCH(batch, 7);
2686     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2687     OUT_BATCH(batch, 0);
2688     OUT_BATCH(batch, 0);
2689     OUT_BATCH(batch, 0);
2690     OUT_BATCH(batch, 0);
2691     OUT_BATCH(batch, 0);
2692     OUT_BATCH(batch, 0);
2693     ADVANCE_BATCH(batch);
2694
2695     BEGIN_BATCH(batch, 7);      
2696     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2697     OUT_BATCH(batch, 0); /* without GS kernel */
2698     OUT_BATCH(batch, 0);
2699     OUT_BATCH(batch, 0);
2700     OUT_BATCH(batch, 0);
2701     OUT_BATCH(batch, 0);
2702     OUT_BATCH(batch, 0); /* pass-through */
2703     ADVANCE_BATCH(batch);
2704
2705     BEGIN_BATCH(batch, 2);
2706     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2707     OUT_BATCH(batch, 0);
2708     ADVANCE_BATCH(batch);
2709
2710     /* disable HS */
2711     BEGIN_BATCH(batch, 7);
2712     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     OUT_BATCH(batch, 0);
2716     OUT_BATCH(batch, 0);
2717     OUT_BATCH(batch, 0);
2718     OUT_BATCH(batch, 0);
2719     ADVANCE_BATCH(batch);
2720
2721     BEGIN_BATCH(batch, 7);
2722     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0);
2725     OUT_BATCH(batch, 0);
2726     OUT_BATCH(batch, 0);
2727     OUT_BATCH(batch, 0);
2728     OUT_BATCH(batch, 0);
2729     ADVANCE_BATCH(batch);
2730
2731     BEGIN_BATCH(batch, 2);
2732     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2733     OUT_BATCH(batch, 0);
2734     ADVANCE_BATCH(batch);
2735
2736     /* Disable TE */
2737     BEGIN_BATCH(batch, 4);
2738     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     ADVANCE_BATCH(batch);
2743
2744     /* Disable DS */
2745     BEGIN_BATCH(batch, 7);
2746     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2747     OUT_BATCH(batch, 0);
2748     OUT_BATCH(batch, 0);
2749     OUT_BATCH(batch, 0);
2750     OUT_BATCH(batch, 0);
2751     OUT_BATCH(batch, 0);
2752     OUT_BATCH(batch, 0);
2753     ADVANCE_BATCH(batch);
2754
2755     BEGIN_BATCH(batch, 6);
2756     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2757     OUT_BATCH(batch, 0);
2758     OUT_BATCH(batch, 0);
2759     OUT_BATCH(batch, 0);
2760     OUT_BATCH(batch, 0);
2761     OUT_BATCH(batch, 0);
2762     ADVANCE_BATCH(batch);
2763
2764     BEGIN_BATCH(batch, 2);
2765     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2766     OUT_BATCH(batch, 0);
2767     ADVANCE_BATCH(batch);
2768
2769     /* Disable STREAMOUT */
2770     BEGIN_BATCH(batch, 3);
2771     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     ADVANCE_BATCH(batch);
2775 }
2776
2777 static void 
2778 gen7_emit_clip_state(VADriverContextP ctx)
2779 {
2780     struct i965_driver_data *i965 = i965_driver_data(ctx);
2781     struct intel_batchbuffer *batch = i965->batch;
2782
2783     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2784     OUT_BATCH(batch, 0);
2785     OUT_BATCH(batch, 0); /* pass-through */
2786     OUT_BATCH(batch, 0);
2787 }
2788
2789 static void 
2790 gen7_emit_sf_state(VADriverContextP ctx)
2791 {
2792     struct i965_driver_data *i965 = i965_driver_data(ctx);
2793     struct intel_batchbuffer *batch = i965->batch;
2794
2795     BEGIN_BATCH(batch, 14);
2796     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2797     OUT_BATCH(batch,
2798               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2799               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2800               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2801     OUT_BATCH(batch, 0);
2802     OUT_BATCH(batch, 0);
2803     OUT_BATCH(batch, 0); /* DW4 */
2804     OUT_BATCH(batch, 0);
2805     OUT_BATCH(batch, 0);
2806     OUT_BATCH(batch, 0);
2807     OUT_BATCH(batch, 0);
2808     OUT_BATCH(batch, 0); /* DW9 */
2809     OUT_BATCH(batch, 0);
2810     OUT_BATCH(batch, 0);
2811     OUT_BATCH(batch, 0);
2812     OUT_BATCH(batch, 0);
2813     ADVANCE_BATCH(batch);
2814
2815     BEGIN_BATCH(batch, 7);
2816     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2817     OUT_BATCH(batch, 0);
2818     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2819     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2820     OUT_BATCH(batch, 0);
2821     OUT_BATCH(batch, 0);
2822     OUT_BATCH(batch, 0);
2823     ADVANCE_BATCH(batch);
2824 }
2825
2826 static void 
2827 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2828 {
2829     struct i965_driver_data *i965 = i965_driver_data(ctx);
2830     struct intel_batchbuffer *batch = i965->batch;
2831     struct i965_render_state *render_state = &i965->render_state;
2832     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2833     unsigned int num_samples = 0;
2834
2835     if (IS_HASWELL(i965->intel.device_id)) {
2836         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2837         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2838     }
2839
2840     BEGIN_BATCH(batch, 3);
2841     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2842     OUT_BATCH(batch,
2843               GEN7_WM_DISPATCH_ENABLE |
2844               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2845     OUT_BATCH(batch, 0);
2846     ADVANCE_BATCH(batch);
2847
2848     BEGIN_BATCH(batch, 7);
2849     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2850     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2851     OUT_BATCH(batch, 0);
2852     OUT_RELOC(batch, 
2853               render_state->curbe.bo,
2854               I915_GEM_DOMAIN_INSTRUCTION, 0,
2855               0);
2856     OUT_BATCH(batch, 0);
2857     OUT_BATCH(batch, 0);
2858     OUT_BATCH(batch, 0);
2859     ADVANCE_BATCH(batch);
2860
2861     BEGIN_BATCH(batch, 8);
2862     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2863     OUT_RELOC(batch, 
2864               render_state->render_kernels[kernel].bo,
2865               I915_GEM_DOMAIN_INSTRUCTION, 0,
2866               0);
2867     OUT_BATCH(batch, 
2868               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2869               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2870     OUT_BATCH(batch, 0); /* scratch space base offset */
2871     OUT_BATCH(batch, 
2872               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
2873               GEN7_PS_PUSH_CONSTANT_ENABLE |
2874               GEN7_PS_ATTRIBUTE_ENABLE |
2875               GEN7_PS_16_DISPATCH_ENABLE);
2876     OUT_BATCH(batch, 
2877               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2878     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2879     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2880     ADVANCE_BATCH(batch);
2881 }
2882
2883 static void
2884 gen7_emit_vertex_element_state(VADriverContextP ctx)
2885 {
2886     struct i965_driver_data *i965 = i965_driver_data(ctx);
2887     struct intel_batchbuffer *batch = i965->batch;
2888
2889     /* Set up our vertex elements, sourced from the single vertex buffer. */
2890     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2891     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2892     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2893               GEN6_VE0_VALID |
2894               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2895               (0 << VE0_OFFSET_SHIFT));
2896     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2897               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2898               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2899               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2900     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2901     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2902               GEN6_VE0_VALID |
2903               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2904               (8 << VE0_OFFSET_SHIFT));
2905     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2906               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2907               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2908               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2909 }
2910
2911 static void
2912 gen7_emit_vertices(VADriverContextP ctx)
2913 {
2914     struct i965_driver_data *i965 = i965_driver_data(ctx);
2915     struct intel_batchbuffer *batch = i965->batch;
2916     struct i965_render_state *render_state = &i965->render_state;
2917
2918     BEGIN_BATCH(batch, 5);
2919     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2920     OUT_BATCH(batch, 
2921               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2922               GEN6_VB0_VERTEXDATA |
2923               GEN7_VB0_ADDRESS_MODIFYENABLE |
2924               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2925     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2926     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2927     OUT_BATCH(batch, 0);
2928     ADVANCE_BATCH(batch);
2929
2930     BEGIN_BATCH(batch, 7);
2931     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2932     OUT_BATCH(batch,
2933               _3DPRIM_RECTLIST |
2934               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2935     OUT_BATCH(batch, 3); /* vertex count per instance */
2936     OUT_BATCH(batch, 0); /* start vertex offset */
2937     OUT_BATCH(batch, 1); /* single instance */
2938     OUT_BATCH(batch, 0); /* start instance location */
2939     OUT_BATCH(batch, 0);
2940     ADVANCE_BATCH(batch);
2941 }
2942
2943 static void
2944 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2945 {
2946     struct i965_driver_data *i965 = i965_driver_data(ctx);
2947     struct intel_batchbuffer *batch = i965->batch;
2948
2949     intel_batchbuffer_start_atomic(batch, 0x1000);
2950     intel_batchbuffer_emit_mi_flush(batch);
2951     gen7_emit_invarient_states(ctx);
2952     gen7_emit_state_base_address(ctx);
2953     gen7_emit_viewport_state_pointers(ctx);
2954     gen7_emit_urb(ctx);
2955     gen7_emit_cc_state_pointers(ctx);
2956     gen7_emit_sampler_state_pointers(ctx);
2957     gen7_emit_bypass_state(ctx);
2958     gen7_emit_vs_state(ctx);
2959     gen7_emit_clip_state(ctx);
2960     gen7_emit_sf_state(ctx);
2961     gen7_emit_wm_state(ctx, kernel);
2962     gen7_emit_binding_table(ctx);
2963     gen7_emit_depth_buffer_state(ctx);
2964     gen7_emit_drawing_rectangle(ctx);
2965     gen7_emit_vertex_element_state(ctx);
2966     gen7_emit_vertices(ctx);
2967     intel_batchbuffer_end_atomic(batch);
2968 }
2969
2970 static void
2971 gen7_render_put_surface(
2972     VADriverContextP   ctx,
2973     struct object_surface *obj_surface,    
2974     const VARectangle *src_rect,
2975     const VARectangle *dst_rect,
2976     unsigned int       flags
2977 )
2978 {
2979     struct i965_driver_data *i965 = i965_driver_data(ctx);
2980     struct intel_batchbuffer *batch = i965->batch;
2981
2982     gen7_render_initialize(ctx);
2983     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2984     i965_clear_dest_region(ctx);
2985     gen7_render_emit_states(ctx, PS_KERNEL);
2986     intel_batchbuffer_flush(batch);
2987 }
2988
2989 static void
2990 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2991 {
2992     struct i965_driver_data *i965 = i965_driver_data(ctx);
2993     struct i965_render_state *render_state = &i965->render_state;
2994     struct gen6_blend_state *blend_state;
2995
2996     dri_bo_unmap(render_state->cc.state);    
2997     dri_bo_map(render_state->cc.blend, 1);
2998     assert(render_state->cc.blend->virtual);
2999     blend_state = render_state->cc.blend->virtual;
3000     memset(blend_state, 0, sizeof(*blend_state));
3001     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3002     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3003     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3004     blend_state->blend0.blend_enable = 1;
3005     blend_state->blend1.post_blend_clamp_enable = 1;
3006     blend_state->blend1.pre_blend_clamp_enable = 1;
3007     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3008     dri_bo_unmap(render_state->cc.blend);
3009 }
3010
3011 static void
3012 gen7_subpicture_render_setup_states(
3013     VADriverContextP   ctx,
3014     struct object_surface *obj_surface,
3015     const VARectangle *src_rect,
3016     const VARectangle *dst_rect
3017 )
3018 {
3019     i965_render_dest_surface_state(ctx, 0);
3020     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3021     i965_render_sampler(ctx);
3022     i965_render_cc_viewport(ctx);
3023     gen7_render_color_calc_state(ctx);
3024     gen7_subpicture_render_blend_state(ctx);
3025     gen7_render_depth_stencil_state(ctx);
3026     i965_subpic_render_upload_constants(ctx, obj_surface);
3027     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3028 }
3029
3030 static void
3031 gen7_render_put_subpicture(
3032     VADriverContextP   ctx,
3033     struct object_surface *obj_surface,
3034     const VARectangle *src_rect,
3035     const VARectangle *dst_rect
3036 )
3037 {
3038     struct i965_driver_data *i965 = i965_driver_data(ctx);
3039     struct intel_batchbuffer *batch = i965->batch;
3040     unsigned int index = obj_surface->subpic_render_idx;
3041     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3042
3043     assert(obj_subpic);
3044     gen7_render_initialize(ctx);
3045     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3046     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3047     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3048     intel_batchbuffer_flush(batch);
3049 }
3050
3051
3052 /*
3053  * global functions
3054  */
3055 VAStatus 
3056 i965_DestroySurfaces(VADriverContextP ctx,
3057                      VASurfaceID *surface_list,
3058                      int num_surfaces);
3059 void
3060 intel_render_put_surface(
3061     VADriverContextP   ctx,
3062     struct object_surface *obj_surface,
3063     const VARectangle *src_rect,
3064     const VARectangle *dst_rect,
3065     unsigned int       flags
3066 )
3067 {
3068     struct i965_driver_data *i965 = i965_driver_data(ctx);
3069     int has_done_scaling = 0;
3070     VASurfaceID out_surface_id = i965_post_processing(ctx,
3071                                                       obj_surface,
3072                                                       src_rect,
3073                                                       dst_rect,
3074                                                       flags,
3075                                                       &has_done_scaling);
3076
3077     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3078
3079     if (out_surface_id != VA_INVALID_ID) {
3080         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3081         
3082         if (new_obj_surface && new_obj_surface->bo)
3083             obj_surface = new_obj_surface;
3084
3085         if (has_done_scaling)
3086             src_rect = dst_rect;
3087     }
3088
3089     if (IS_GEN7(i965->intel.device_id))
3090         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3091     else if (IS_GEN6(i965->intel.device_id))
3092         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3093     else
3094         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3095
3096     if (out_surface_id != VA_INVALID_ID)
3097         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3098 }
3099
3100 void
3101 intel_render_put_subpicture(
3102     VADriverContextP   ctx,
3103     struct object_surface *obj_surface,
3104     const VARectangle *src_rect,
3105     const VARectangle *dst_rect
3106 )
3107 {
3108     struct i965_driver_data *i965 = i965_driver_data(ctx);
3109
3110     if (IS_GEN7(i965->intel.device_id))
3111         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3112     else if (IS_GEN6(i965->intel.device_id))
3113         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3114     else
3115         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3116 }
3117
3118 bool 
3119 i965_render_init(VADriverContextP ctx)
3120 {
3121     struct i965_driver_data *i965 = i965_driver_data(ctx);
3122     struct i965_render_state *render_state = &i965->render_state;
3123     int i;
3124
3125     /* kernel */
3126     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3127                                  sizeof(render_kernels_gen5[0])));
3128     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3129                                  sizeof(render_kernels_gen6[0])));
3130
3131     if (IS_GEN7(i965->intel.device_id))
3132         memcpy(render_state->render_kernels,
3133                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3134                sizeof(render_state->render_kernels));
3135     else if (IS_GEN6(i965->intel.device_id))
3136         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3137     else if (IS_IRONLAKE(i965->intel.device_id))
3138         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3139     else
3140         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3141
3142     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3143         struct i965_kernel *kernel = &render_state->render_kernels[i];
3144
3145         if (!kernel->size)
3146             continue;
3147
3148         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3149                                   kernel->name, 
3150                                   kernel->size, 0x1000);
3151         assert(kernel->bo);
3152         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3153     }
3154
3155     /* constant buffer */
3156     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3157                       "constant buffer",
3158                       4096, 64);
3159     assert(render_state->curbe.bo);
3160
3161     if (IS_HSW_GT1(i965->intel.device_id)) {
3162         render_state->max_wm_threads = 102;
3163     } else if (IS_HSW_GT2(i965->intel.device_id)) {
3164         render_state->max_wm_threads = 204;
3165     } else if (IS_HSW_GT3(i965->intel.device_id)) {
3166         render_state->max_wm_threads = 408;
3167     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
3168         render_state->max_wm_threads = 48;
3169     } else if (IS_IVB_GT2(i965->intel.device_id)) {
3170         render_state->max_wm_threads = 172;
3171     } else if (IS_SNB_GT1(i965->intel.device_id)) {
3172         render_state->max_wm_threads = 40;
3173     } else if (IS_SNB_GT2(i965->intel.device_id)) {
3174         render_state->max_wm_threads = 80;
3175     } else if (IS_IRONLAKE(i965->intel.device_id)) {
3176         render_state->max_wm_threads = 72; /* 12 * 6 */
3177     } else if (IS_G4X(i965->intel.device_id)) {
3178         render_state->max_wm_threads = 50; /* 12 * 5 */
3179     } else {
3180         /* should never get here !!! */
3181         assert(0);
3182     }
3183
3184     return true;
3185 }
3186
3187 void 
3188 i965_render_terminate(VADriverContextP ctx)
3189 {
3190     int i;
3191     struct i965_driver_data *i965 = i965_driver_data(ctx);
3192     struct i965_render_state *render_state = &i965->render_state;
3193
3194     dri_bo_unreference(render_state->curbe.bo);
3195     render_state->curbe.bo = NULL;
3196
3197     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3198         struct i965_kernel *kernel = &render_state->render_kernels[i];
3199         
3200         dri_bo_unreference(kernel->bo);
3201         kernel->bo = NULL;
3202     }
3203
3204     dri_bo_unreference(render_state->vb.vertex_buffer);
3205     render_state->vb.vertex_buffer = NULL;
3206     dri_bo_unreference(render_state->vs.state);
3207     render_state->vs.state = NULL;
3208     dri_bo_unreference(render_state->sf.state);
3209     render_state->sf.state = NULL;
3210     dri_bo_unreference(render_state->wm.sampler);
3211     render_state->wm.sampler = NULL;
3212     dri_bo_unreference(render_state->wm.state);
3213     render_state->wm.state = NULL;
3214     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3215     dri_bo_unreference(render_state->cc.viewport);
3216     render_state->cc.viewport = NULL;
3217     dri_bo_unreference(render_state->cc.state);
3218     render_state->cc.state = NULL;
3219     dri_bo_unreference(render_state->cc.blend);
3220     render_state->cc.blend = NULL;
3221     dri_bo_unreference(render_state->cc.depth_stencil);
3222     render_state->cc.depth_stencil = NULL;
3223
3224     if (render_state->draw_region) {
3225         dri_bo_unreference(render_state->draw_region->bo);
3226         free(render_state->draw_region);
3227         render_state->draw_region = NULL;
3228     }
3229 }
3230