9d0e8465cbba4f42863c84c1fa54835ee05bd8fb
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150
151 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
152
153 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
154 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
155
156 static uint32_t float_to_uint (float f) 
157 {
158     union {
159         uint32_t i; 
160         float f;
161     } x;
162
163     x.f = f;
164     return x.i;
165 }
166
167 enum 
168 {
169     SF_KERNEL = 0,
170     PS_KERNEL,
171     PS_SUBPIC_KERNEL
172 };
173
174 static struct i965_kernel render_kernels_gen4[] = {
175     {
176         "SF",
177         SF_KERNEL,
178         sf_kernel_static,
179         sizeof(sf_kernel_static),
180         NULL
181     },
182     {
183         "PS",
184         PS_KERNEL,
185         ps_kernel_static,
186         sizeof(ps_kernel_static),
187         NULL
188     },
189
190     {
191         "PS_SUBPIC",
192         PS_SUBPIC_KERNEL,
193         ps_subpic_kernel_static,
194         sizeof(ps_subpic_kernel_static),
195         NULL
196     }
197 };
198
199 static struct i965_kernel render_kernels_gen5[] = {
200     {
201         "SF",
202         SF_KERNEL,
203         sf_kernel_static_gen5,
204         sizeof(sf_kernel_static_gen5),
205         NULL
206     },
207     {
208         "PS",
209         PS_KERNEL,
210         ps_kernel_static_gen5,
211         sizeof(ps_kernel_static_gen5),
212         NULL
213     },
214
215     {
216         "PS_SUBPIC",
217         PS_SUBPIC_KERNEL,
218         ps_subpic_kernel_static_gen5,
219         sizeof(ps_subpic_kernel_static_gen5),
220         NULL
221     }
222 };
223
224 static struct i965_kernel render_kernels_gen6[] = {
225     {
226         "SF",
227         SF_KERNEL,
228         sf_kernel_static_gen6,
229         sizeof(sf_kernel_static_gen6),
230         NULL
231     },
232     {
233         "PS",
234         PS_KERNEL,
235         ps_kernel_static_gen6,
236         sizeof(ps_kernel_static_gen6),
237         NULL
238     },
239
240     {
241         "PS_SUBPIC",
242         PS_SUBPIC_KERNEL,
243         ps_subpic_kernel_static_gen6,
244         sizeof(ps_subpic_kernel_static_gen6),
245         NULL
246     }
247 };
248
249 static struct i965_kernel render_kernels_gen7[] = {
250     {
251         "SF",
252         SF_KERNEL,
253         sf_kernel_static_gen7,
254         sizeof(sf_kernel_static_gen7),
255         NULL
256     },
257     {
258         "PS",
259         PS_KERNEL,
260         ps_kernel_static_gen7,
261         sizeof(ps_kernel_static_gen7),
262         NULL
263     },
264
265     {
266         "PS_SUBPIC",
267         PS_SUBPIC_KERNEL,
268         ps_subpic_kernel_static_gen7,
269         sizeof(ps_subpic_kernel_static_gen7),
270         NULL
271     }
272 };
273
274 static struct i965_kernel render_kernels_gen7_haswell[] = {
275     {
276         "SF",
277         SF_KERNEL,
278         sf_kernel_static_gen7,
279         sizeof(sf_kernel_static_gen7),
280         NULL
281     },
282     {
283         "PS",
284         PS_KERNEL,
285         ps_kernel_static_gen7_haswell,
286         sizeof(ps_kernel_static_gen7_haswell),
287         NULL
288     },
289
290     {
291         "PS_SUBPIC",
292         PS_SUBPIC_KERNEL,
293         ps_subpic_kernel_static_gen7,
294         sizeof(ps_subpic_kernel_static_gen7),
295         NULL
296     }
297 };
298
299 #define URB_VS_ENTRIES        8
300 #define URB_VS_ENTRY_SIZE     1
301
302 #define URB_GS_ENTRIES        0
303 #define URB_GS_ENTRY_SIZE     0
304
305 #define URB_CLIP_ENTRIES      0
306 #define URB_CLIP_ENTRY_SIZE   0
307
308 #define URB_SF_ENTRIES        1
309 #define URB_SF_ENTRY_SIZE     2
310
311 #define URB_CS_ENTRIES        4
312 #define URB_CS_ENTRY_SIZE     4
313
314 static float yuv_to_rgb_bt601[3][4] = {
315 {1.164,         0,      1.596,          -0.06275,},
316 {1.164,         -0.392, -0.813,         -0.50196,},
317 {1.164,         2.017,  0,              -0.50196,},
318 };
319
320 static float yuv_to_rgb_bt709[3][4] = {
321 {1.164,         0,      1.793,          -0.06275,},
322 {1.164,         -0.213, -0.533,         -0.50196,},
323 {1.164,         2.112,  0,              -0.50196,},
324 };
325
326 static float yuv_to_rgb_smpte_240[3][4] = {
327 {1.164,         0,      1.794,          -0.06275,},
328 {1.164,         -0.258, -0.5425,        -0.50196,},
329 {1.164,         2.078,  0,              -0.50196,},
330 };
331
332 static void
333 i965_render_vs_unit(VADriverContextP ctx)
334 {
335     struct i965_driver_data *i965 = i965_driver_data(ctx);
336     struct i965_render_state *render_state = &i965->render_state;
337     struct i965_vs_unit_state *vs_state;
338
339     dri_bo_map(render_state->vs.state, 1);
340     assert(render_state->vs.state->virtual);
341     vs_state = render_state->vs.state->virtual;
342     memset(vs_state, 0, sizeof(*vs_state));
343
344     if (IS_IRONLAKE(i965->intel.device_info))
345         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
346     else
347         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
348
349     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
350     vs_state->vs6.vs_enable = 0;
351     vs_state->vs6.vert_cache_disable = 1;
352     
353     dri_bo_unmap(render_state->vs.state);
354 }
355
356 static void
357 i965_render_sf_unit(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sf_unit_state *sf_state;
362
363     dri_bo_map(render_state->sf.state, 1);
364     assert(render_state->sf.state->virtual);
365     sf_state = render_state->sf.state->virtual;
366     memset(sf_state, 0, sizeof(*sf_state));
367
368     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
369     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
370
371     sf_state->sf1.single_program_flow = 1; /* XXX */
372     sf_state->sf1.binding_table_entry_count = 0;
373     sf_state->sf1.thread_priority = 0;
374     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
375     sf_state->sf1.illegal_op_exception_enable = 1;
376     sf_state->sf1.mask_stack_exception_enable = 1;
377     sf_state->sf1.sw_exception_enable = 1;
378
379     /* scratch space is not used in our kernel */
380     sf_state->thread2.per_thread_scratch_space = 0;
381     sf_state->thread2.scratch_space_base_pointer = 0;
382
383     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
384     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
385     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
386     sf_state->thread3.urb_entry_read_offset = 0;
387     sf_state->thread3.dispatch_grf_start_reg = 3;
388
389     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
390     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
391     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
392     sf_state->thread4.stats_enable = 1;
393
394     sf_state->sf5.viewport_transform = 0; /* skip viewport */
395
396     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
397     sf_state->sf6.scissor = 0;
398
399     sf_state->sf7.trifan_pv = 2;
400
401     sf_state->sf6.dest_org_vbias = 0x8;
402     sf_state->sf6.dest_org_hbias = 0x8;
403
404     dri_bo_emit_reloc(render_state->sf.state,
405                       I915_GEM_DOMAIN_INSTRUCTION, 0,
406                       sf_state->thread0.grf_reg_count << 1,
407                       offsetof(struct i965_sf_unit_state, thread0),
408                       render_state->render_kernels[SF_KERNEL].bo);
409
410     dri_bo_unmap(render_state->sf.state);
411 }
412
413 static void 
414 i965_render_sampler(VADriverContextP ctx)
415 {
416     struct i965_driver_data *i965 = i965_driver_data(ctx);
417     struct i965_render_state *render_state = &i965->render_state;
418     struct i965_sampler_state *sampler_state;
419     int i;
420     
421     assert(render_state->wm.sampler_count > 0);
422     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
423
424     dri_bo_map(render_state->wm.sampler, 1);
425     assert(render_state->wm.sampler->virtual);
426     sampler_state = render_state->wm.sampler->virtual;
427     for (i = 0; i < render_state->wm.sampler_count; i++) {
428         memset(sampler_state, 0, sizeof(*sampler_state));
429         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
430         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
431         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
432         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
433         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
434         sampler_state++;
435     }
436
437     dri_bo_unmap(render_state->wm.sampler);
438 }
439 static void
440 i965_subpic_render_wm_unit(VADriverContextP ctx)
441 {
442     struct i965_driver_data *i965 = i965_driver_data(ctx);
443     struct i965_render_state *render_state = &i965->render_state;
444     struct i965_wm_unit_state *wm_state;
445
446     assert(render_state->wm.sampler);
447
448     dri_bo_map(render_state->wm.state, 1);
449     assert(render_state->wm.state->virtual);
450     wm_state = render_state->wm.state->virtual;
451     memset(wm_state, 0, sizeof(*wm_state));
452
453     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
454     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
455
456     wm_state->thread1.single_program_flow = 1; /* XXX */
457
458     if (IS_IRONLAKE(i965->intel.device_info))
459         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
460     else
461         wm_state->thread1.binding_table_entry_count = 7;
462
463     wm_state->thread2.scratch_space_base_pointer = 0;
464     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
465
466     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
467     wm_state->thread3.const_urb_entry_read_length = 4;
468     wm_state->thread3.const_urb_entry_read_offset = 0;
469     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
470     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
471
472     wm_state->wm4.stats_enable = 0;
473     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
474
475     if (IS_IRONLAKE(i965->intel.device_info)) {
476         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
477     } else {
478         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
479     }
480
481     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
482     wm_state->wm5.thread_dispatch_enable = 1;
483     wm_state->wm5.enable_16_pix = 1;
484     wm_state->wm5.enable_8_pix = 0;
485     wm_state->wm5.early_depth_test = 1;
486
487     dri_bo_emit_reloc(render_state->wm.state,
488                       I915_GEM_DOMAIN_INSTRUCTION, 0,
489                       wm_state->thread0.grf_reg_count << 1,
490                       offsetof(struct i965_wm_unit_state, thread0),
491                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
492
493     dri_bo_emit_reloc(render_state->wm.state,
494                       I915_GEM_DOMAIN_INSTRUCTION, 0,
495                       wm_state->wm4.sampler_count << 2,
496                       offsetof(struct i965_wm_unit_state, wm4),
497                       render_state->wm.sampler);
498
499     dri_bo_unmap(render_state->wm.state);
500 }
501
502
503 static void
504 i965_render_wm_unit(VADriverContextP ctx)
505 {
506     struct i965_driver_data *i965 = i965_driver_data(ctx);
507     struct i965_render_state *render_state = &i965->render_state;
508     struct i965_wm_unit_state *wm_state;
509
510     assert(render_state->wm.sampler);
511
512     dri_bo_map(render_state->wm.state, 1);
513     assert(render_state->wm.state->virtual);
514     wm_state = render_state->wm.state->virtual;
515     memset(wm_state, 0, sizeof(*wm_state));
516
517     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
518     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
519
520     wm_state->thread1.single_program_flow = 1; /* XXX */
521
522     if (IS_IRONLAKE(i965->intel.device_info))
523         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
524     else
525         wm_state->thread1.binding_table_entry_count = 7;
526
527     wm_state->thread2.scratch_space_base_pointer = 0;
528     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
529
530     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
531     wm_state->thread3.const_urb_entry_read_length = 4;
532     wm_state->thread3.const_urb_entry_read_offset = 0;
533     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
534     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
535
536     wm_state->wm4.stats_enable = 0;
537     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
538
539     if (IS_IRONLAKE(i965->intel.device_info)) {
540         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
541     } else {
542         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
543     }
544
545     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
546     wm_state->wm5.thread_dispatch_enable = 1;
547     wm_state->wm5.enable_16_pix = 1;
548     wm_state->wm5.enable_8_pix = 0;
549     wm_state->wm5.early_depth_test = 1;
550
551     dri_bo_emit_reloc(render_state->wm.state,
552                       I915_GEM_DOMAIN_INSTRUCTION, 0,
553                       wm_state->thread0.grf_reg_count << 1,
554                       offsetof(struct i965_wm_unit_state, thread0),
555                       render_state->render_kernels[PS_KERNEL].bo);
556
557     dri_bo_emit_reloc(render_state->wm.state,
558                       I915_GEM_DOMAIN_INSTRUCTION, 0,
559                       wm_state->wm4.sampler_count << 2,
560                       offsetof(struct i965_wm_unit_state, wm4),
561                       render_state->wm.sampler);
562
563     dri_bo_unmap(render_state->wm.state);
564 }
565
566 static void 
567 i965_render_cc_viewport(VADriverContextP ctx)
568 {
569     struct i965_driver_data *i965 = i965_driver_data(ctx);
570     struct i965_render_state *render_state = &i965->render_state;
571     struct i965_cc_viewport *cc_viewport;
572
573     dri_bo_map(render_state->cc.viewport, 1);
574     assert(render_state->cc.viewport->virtual);
575     cc_viewport = render_state->cc.viewport->virtual;
576     memset(cc_viewport, 0, sizeof(*cc_viewport));
577     
578     cc_viewport->min_depth = -1.e35;
579     cc_viewport->max_depth = 1.e35;
580
581     dri_bo_unmap(render_state->cc.viewport);
582 }
583
584 static void 
585 i965_subpic_render_cc_unit(VADriverContextP ctx)
586 {
587     struct i965_driver_data *i965 = i965_driver_data(ctx);
588     struct i965_render_state *render_state = &i965->render_state;
589     struct i965_cc_unit_state *cc_state;
590
591     assert(render_state->cc.viewport);
592
593     dri_bo_map(render_state->cc.state, 1);
594     assert(render_state->cc.state->virtual);
595     cc_state = render_state->cc.state->virtual;
596     memset(cc_state, 0, sizeof(*cc_state));
597
598     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
599     cc_state->cc2.depth_test = 0;       /* disable depth test */
600     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
601     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
602     cc_state->cc3.blend_enable = 1;     /* enable color blend */
603     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
604     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
605     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
606     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
607
608     cc_state->cc5.dither_enable = 0;    /* disable dither */
609     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
610     cc_state->cc5.statistics_enable = 1;
611     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
612     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
613     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
614
615     cc_state->cc6.clamp_post_alpha_blend = 0; 
616     cc_state->cc6.clamp_pre_alpha_blend  =0; 
617     
618     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
619     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
620     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
621     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
622    
623     /*alpha test reference*/
624     cc_state->cc7.alpha_ref.f =0.0 ;
625
626
627     dri_bo_emit_reloc(render_state->cc.state,
628                       I915_GEM_DOMAIN_INSTRUCTION, 0,
629                       0,
630                       offsetof(struct i965_cc_unit_state, cc4),
631                       render_state->cc.viewport);
632
633     dri_bo_unmap(render_state->cc.state);
634 }
635
636
637 static void 
638 i965_render_cc_unit(VADriverContextP ctx)
639 {
640     struct i965_driver_data *i965 = i965_driver_data(ctx);
641     struct i965_render_state *render_state = &i965->render_state;
642     struct i965_cc_unit_state *cc_state;
643
644     assert(render_state->cc.viewport);
645
646     dri_bo_map(render_state->cc.state, 1);
647     assert(render_state->cc.state->virtual);
648     cc_state = render_state->cc.state->virtual;
649     memset(cc_state, 0, sizeof(*cc_state));
650
651     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
652     cc_state->cc2.depth_test = 0;       /* disable depth test */
653     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
654     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
655     cc_state->cc3.blend_enable = 0;     /* disable color blend */
656     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
657     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
658
659     cc_state->cc5.dither_enable = 0;    /* disable dither */
660     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
661     cc_state->cc5.statistics_enable = 1;
662     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
663     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
664     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
665
666     dri_bo_emit_reloc(render_state->cc.state,
667                       I915_GEM_DOMAIN_INSTRUCTION, 0,
668                       0,
669                       offsetof(struct i965_cc_unit_state, cc4),
670                       render_state->cc.viewport);
671
672     dri_bo_unmap(render_state->cc.state);
673 }
674
675 static void
676 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
677 {
678     switch (tiling) {
679     case I915_TILING_NONE:
680         ss->ss3.tiled_surface = 0;
681         ss->ss3.tile_walk = 0;
682         break;
683     case I915_TILING_X:
684         ss->ss3.tiled_surface = 1;
685         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
686         break;
687     case I915_TILING_Y:
688         ss->ss3.tiled_surface = 1;
689         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
690         break;
691     }
692 }
693
694 static void
695 i965_render_set_surface_state(
696     struct i965_surface_state *ss,
697     dri_bo                    *bo,
698     unsigned long              offset,
699     unsigned int               width,
700     unsigned int               height,
701     unsigned int               pitch,
702     unsigned int               format,
703     unsigned int               flags
704 )
705 {
706     unsigned int tiling;
707     unsigned int swizzle;
708
709     memset(ss, 0, sizeof(*ss));
710
711     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
712     case I965_PP_FLAG_BOTTOM_FIELD:
713         ss->ss0.vert_line_stride_ofs = 1;
714         /* fall-through */
715     case I965_PP_FLAG_TOP_FIELD:
716         ss->ss0.vert_line_stride = 1;
717         height /= 2;
718         break;
719     }
720
721     ss->ss0.surface_type = I965_SURFACE_2D;
722     ss->ss0.surface_format = format;
723     ss->ss0.color_blend = 1;
724
725     ss->ss1.base_addr = bo->offset + offset;
726
727     ss->ss2.width = width - 1;
728     ss->ss2.height = height - 1;
729
730     ss->ss3.pitch = pitch - 1;
731
732     dri_bo_get_tiling(bo, &tiling, &swizzle);
733     i965_render_set_surface_tiling(ss, tiling);
734 }
735
736 static void
737 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
738 {
739    switch (tiling) {
740    case I915_TILING_NONE:
741       ss->ss0.tiled_surface = 0;
742       ss->ss0.tile_walk = 0;
743       break;
744    case I915_TILING_X:
745       ss->ss0.tiled_surface = 1;
746       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
747       break;
748    case I915_TILING_Y:
749       ss->ss0.tiled_surface = 1;
750       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
751       break;
752    }
753 }
754
755 /* Set "Shader Channel Select" */
756 void
757 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
758 {
759     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
760     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
761     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
762     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
763 }
764
765 static void
766 gen7_render_set_surface_state(
767     struct gen7_surface_state *ss,
768     dri_bo                    *bo,
769     unsigned long              offset,
770     int                        width,
771     int                        height,
772     int                        pitch,
773     int                        format,
774     unsigned int               flags
775 )
776 {
777     unsigned int tiling;
778     unsigned int swizzle;
779
780     memset(ss, 0, sizeof(*ss));
781
782     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
783     case I965_PP_FLAG_BOTTOM_FIELD:
784         ss->ss0.vert_line_stride_ofs = 1;
785         /* fall-through */
786     case I965_PP_FLAG_TOP_FIELD:
787         ss->ss0.vert_line_stride = 1;
788         height /= 2;
789         break;
790     }
791
792     ss->ss0.surface_type = I965_SURFACE_2D;
793     ss->ss0.surface_format = format;
794
795     ss->ss1.base_addr = bo->offset + offset;
796
797     ss->ss2.width = width - 1;
798     ss->ss2.height = height - 1;
799
800     ss->ss3.pitch = pitch - 1;
801
802     dri_bo_get_tiling(bo, &tiling, &swizzle);
803     gen7_render_set_surface_tiling(ss, tiling);
804 }
805
806
807 static void
808 i965_render_src_surface_state(
809     VADriverContextP ctx, 
810     int              index,
811     dri_bo          *region,
812     unsigned long    offset,
813     int              w,
814     int              h,
815     int              pitch,
816     int              format,
817     unsigned int     flags
818 )
819 {
820     struct i965_driver_data *i965 = i965_driver_data(ctx);  
821     struct i965_render_state *render_state = &i965->render_state;
822     void *ss;
823     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
824
825     assert(index < MAX_RENDER_SURFACES);
826
827     dri_bo_map(ss_bo, 1);
828     assert(ss_bo->virtual);
829     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
830
831     if (IS_GEN7(i965->intel.device_info)) {
832         gen7_render_set_surface_state(ss,
833                                       region, offset,
834                                       w, h,
835                                       pitch, format, flags);
836         if (IS_HASWELL(i965->intel.device_info))
837             gen7_render_set_surface_scs(ss);
838         dri_bo_emit_reloc(ss_bo,
839                           I915_GEM_DOMAIN_SAMPLER, 0,
840                           offset,
841                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
842                           region);
843     } else {
844         i965_render_set_surface_state(ss,
845                                       region, offset,
846                                       w, h,
847                                       pitch, format, flags);
848         dri_bo_emit_reloc(ss_bo,
849                           I915_GEM_DOMAIN_SAMPLER, 0,
850                           offset,
851                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
852                           region);
853     }
854
855     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
856     dri_bo_unmap(ss_bo);
857     render_state->wm.sampler_count++;
858 }
859
860 static void
861 i965_render_src_surfaces_state(
862     VADriverContextP ctx,
863     struct object_surface *obj_surface,
864     unsigned int     flags
865 )
866 {
867     int region_pitch;
868     int rw, rh;
869     dri_bo *region;
870
871     region_pitch = obj_surface->width;
872     rw = obj_surface->orig_width;
873     rh = obj_surface->orig_height;
874     region = obj_surface->bo;
875
876     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
877     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
878
879     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
880         return;
881
882     if (obj_surface->fourcc == VA_FOURCC_NV12) {
883         i965_render_src_surface_state(ctx, 3, region,
884                                       region_pitch * obj_surface->y_cb_offset,
885                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
886                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
887         i965_render_src_surface_state(ctx, 4, region,
888                                       region_pitch * obj_surface->y_cb_offset,
889                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
890                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
891     } else {
892         i965_render_src_surface_state(ctx, 3, region,
893                                       region_pitch * obj_surface->y_cb_offset,
894                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
895                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
896         i965_render_src_surface_state(ctx, 4, region,
897                                       region_pitch * obj_surface->y_cb_offset,
898                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
899                                       I965_SURFACEFORMAT_R8_UNORM, flags);
900         i965_render_src_surface_state(ctx, 5, region,
901                                       region_pitch * obj_surface->y_cr_offset,
902                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
903                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
904         i965_render_src_surface_state(ctx, 6, region,
905                                       region_pitch * obj_surface->y_cr_offset,
906                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
907                                       I965_SURFACEFORMAT_R8_UNORM, flags);
908     }
909 }
910
911 static void
912 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
913                                       struct object_surface *obj_surface)
914 {
915     dri_bo *subpic_region;
916     unsigned int index = obj_surface->subpic_render_idx;
917     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
918     struct object_image *obj_image = obj_subpic->obj_image;
919
920     assert(obj_surface);
921     assert(obj_surface->bo);
922     subpic_region = obj_image->bo;
923     /*subpicture surface*/
924     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
925     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
926 }
927
928 static void
929 i965_render_dest_surface_state(VADriverContextP ctx, int index)
930 {
931     struct i965_driver_data *i965 = i965_driver_data(ctx);  
932     struct i965_render_state *render_state = &i965->render_state;
933     struct intel_region *dest_region = render_state->draw_region;
934     void *ss;
935     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
936     int format;
937     assert(index < MAX_RENDER_SURFACES);
938
939     if (dest_region->cpp == 2) {
940         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
941     } else {
942         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
943     }
944
945     dri_bo_map(ss_bo, 1);
946     assert(ss_bo->virtual);
947     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
948
949     if (IS_GEN7(i965->intel.device_info)) {
950         gen7_render_set_surface_state(ss,
951                                       dest_region->bo, 0,
952                                       dest_region->width, dest_region->height,
953                                       dest_region->pitch, format, 0);
954         if (IS_HASWELL(i965->intel.device_info))
955             gen7_render_set_surface_scs(ss);
956         dri_bo_emit_reloc(ss_bo,
957                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
958                           0,
959                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
960                           dest_region->bo);
961     } else {
962         i965_render_set_surface_state(ss,
963                                       dest_region->bo, 0,
964                                       dest_region->width, dest_region->height,
965                                       dest_region->pitch, format, 0);
966         dri_bo_emit_reloc(ss_bo,
967                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
968                           0,
969                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
970                           dest_region->bo);
971     }
972
973     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
974     dri_bo_unmap(ss_bo);
975 }
976
977 static void
978 i965_fill_vertex_buffer(
979     VADriverContextP ctx,
980     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
981     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
982 )
983 {
984     struct i965_driver_data * const i965 = i965_driver_data(ctx);
985     float vb[12];
986
987     enum { X1, Y1, X2, Y2 };
988
989     static const unsigned int g_rotation_indices[][6] = {
990         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
991         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
992         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
993         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
994     };
995
996     const unsigned int * const rotation_indices =
997         g_rotation_indices[i965->rotation_attrib->value];
998
999     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1000     vb[1]  = tex_coords[rotation_indices[1]];
1001     vb[2]  = vid_coords[X2];
1002     vb[3]  = vid_coords[Y2];
1003
1004     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1005     vb[5]  = tex_coords[rotation_indices[3]];
1006     vb[6]  = vid_coords[X1];
1007     vb[7]  = vid_coords[Y2];
1008
1009     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1010     vb[9]  = tex_coords[rotation_indices[5]];
1011     vb[10] = vid_coords[X1];
1012     vb[11] = vid_coords[Y1];
1013
1014     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1015 }
1016
1017 static void 
1018 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1019                                  struct object_surface *obj_surface,
1020                                  const VARectangle *output_rect)
1021 {    
1022     unsigned int index = obj_surface->subpic_render_idx;
1023     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1024     float tex_coords[4], vid_coords[4];
1025     VARectangle dst_rect;
1026
1027     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1028         dst_rect = obj_subpic->dst_rect;
1029     else {
1030         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1031         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1032         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1033         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1034         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1035         dst_rect.height = sy * obj_subpic->dst_rect.height;
1036     }
1037
1038     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1039     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1040     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1041     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1042
1043     vid_coords[0] = dst_rect.x;
1044     vid_coords[1] = dst_rect.y;
1045     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1046     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1047
1048     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1049 }
1050
1051 static void 
1052 i965_render_upload_vertex(
1053     VADriverContextP   ctx,
1054     struct object_surface *obj_surface,
1055     const VARectangle *src_rect,
1056     const VARectangle *dst_rect
1057 )
1058 {
1059     struct i965_driver_data *i965 = i965_driver_data(ctx);
1060     struct i965_render_state *render_state = &i965->render_state;
1061     struct intel_region *dest_region = render_state->draw_region;
1062     float tex_coords[4], vid_coords[4];
1063     int width, height;
1064
1065     width  = obj_surface->orig_width;
1066     height = obj_surface->orig_height;
1067
1068     tex_coords[0] = (float)src_rect->x / width;
1069     tex_coords[1] = (float)src_rect->y / height;
1070     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1071     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1072
1073     vid_coords[0] = dest_region->x + dst_rect->x;
1074     vid_coords[1] = dest_region->y + dst_rect->y;
1075     vid_coords[2] = vid_coords[0] + dst_rect->width;
1076     vid_coords[3] = vid_coords[1] + dst_rect->height;
1077
1078     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1079 }
1080
1081 #define PI  3.1415926
1082
1083 static void
1084 i965_render_upload_constants(VADriverContextP ctx,
1085                              struct object_surface *obj_surface,
1086                              unsigned int flags)
1087 {
1088     struct i965_driver_data *i965 = i965_driver_data(ctx);
1089     struct i965_render_state *render_state = &i965->render_state;
1090     unsigned short *constant_buffer;
1091     float *color_balance_base;
1092     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1093     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1094     float hue = (float)i965->hue_attrib->value / 180 * PI;
1095     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1096     float *yuv_to_rgb;
1097     unsigned int color_flag;
1098
1099     dri_bo_map(render_state->curbe.bo, 1);
1100     assert(render_state->curbe.bo->virtual);
1101     constant_buffer = render_state->curbe.bo->virtual;
1102
1103     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1104         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1105
1106         constant_buffer[0] = 2;
1107     } else {
1108         if (obj_surface->fourcc == VA_FOURCC_NV12)
1109             constant_buffer[0] = 1;
1110         else
1111             constant_buffer[0] = 0;
1112     }
1113
1114     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1115         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1116         i965->hue_attrib->value == DEFAULT_HUE &&
1117         i965->saturation_attrib->value == DEFAULT_SATURATION)
1118         constant_buffer[1] = 1; /* skip color balance transformation */
1119     else
1120         constant_buffer[1] = 0;
1121
1122     color_balance_base = (float *)constant_buffer + 4;
1123     *color_balance_base++ = contrast;
1124     *color_balance_base++ = brightness;
1125     *color_balance_base++ = cos(hue) * contrast * saturation;
1126     *color_balance_base++ = sin(hue) * contrast * saturation;
1127
1128     color_flag = flags & VA_SRC_COLOR_MASK;
1129     yuv_to_rgb = (float *)constant_buffer + 8;
1130     if (color_flag == VA_SRC_BT709)
1131         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1132     else if (color_flag == VA_SRC_SMPTE_240)
1133         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1134     else
1135         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1136
1137     dri_bo_unmap(render_state->curbe.bo);
1138 }
1139
1140 static void
1141 i965_subpic_render_upload_constants(VADriverContextP ctx,
1142                                     struct object_surface *obj_surface)
1143 {
1144     struct i965_driver_data *i965 = i965_driver_data(ctx);
1145     struct i965_render_state *render_state = &i965->render_state;
1146     float *constant_buffer;
1147     float global_alpha = 1.0;
1148     unsigned int index = obj_surface->subpic_render_idx;
1149     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1150     
1151     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1152         global_alpha = obj_subpic->global_alpha;
1153     }
1154
1155     dri_bo_map(render_state->curbe.bo, 1);
1156
1157     assert(render_state->curbe.bo->virtual);
1158     constant_buffer = render_state->curbe.bo->virtual;
1159     *constant_buffer = global_alpha;
1160
1161     dri_bo_unmap(render_state->curbe.bo);
1162 }
1163  
1164 static void
1165 i965_surface_render_state_setup(
1166     VADriverContextP   ctx,
1167     struct object_surface *obj_surface,
1168     const VARectangle *src_rect,
1169     const VARectangle *dst_rect,
1170     unsigned int       flags
1171 )
1172 {
1173     i965_render_vs_unit(ctx);
1174     i965_render_sf_unit(ctx);
1175     i965_render_dest_surface_state(ctx, 0);
1176     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1177     i965_render_sampler(ctx);
1178     i965_render_wm_unit(ctx);
1179     i965_render_cc_viewport(ctx);
1180     i965_render_cc_unit(ctx);
1181     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1182     i965_render_upload_constants(ctx, obj_surface, flags);
1183 }
1184
1185 static void
1186 i965_subpic_render_state_setup(
1187     VADriverContextP   ctx,
1188     struct object_surface *obj_surface,
1189     const VARectangle *src_rect,
1190     const VARectangle *dst_rect
1191 )
1192 {
1193     i965_render_vs_unit(ctx);
1194     i965_render_sf_unit(ctx);
1195     i965_render_dest_surface_state(ctx, 0);
1196     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1197     i965_render_sampler(ctx);
1198     i965_subpic_render_wm_unit(ctx);
1199     i965_render_cc_viewport(ctx);
1200     i965_subpic_render_cc_unit(ctx);
1201     i965_subpic_render_upload_constants(ctx, obj_surface);
1202     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1203 }
1204
1205
1206 static void
1207 i965_render_pipeline_select(VADriverContextP ctx)
1208 {
1209     struct i965_driver_data *i965 = i965_driver_data(ctx);
1210     struct intel_batchbuffer *batch = i965->batch;
1211  
1212     BEGIN_BATCH(batch, 1);
1213     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1214     ADVANCE_BATCH(batch);
1215 }
1216
1217 static void
1218 i965_render_state_sip(VADriverContextP ctx)
1219 {
1220     struct i965_driver_data *i965 = i965_driver_data(ctx);
1221     struct intel_batchbuffer *batch = i965->batch;
1222
1223     BEGIN_BATCH(batch, 2);
1224     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1225     OUT_BATCH(batch, 0);
1226     ADVANCE_BATCH(batch);
1227 }
1228
1229 static void
1230 i965_render_state_base_address(VADriverContextP ctx)
1231 {
1232     struct i965_driver_data *i965 = i965_driver_data(ctx);
1233     struct intel_batchbuffer *batch = i965->batch;
1234     struct i965_render_state *render_state = &i965->render_state;
1235
1236     if (IS_IRONLAKE(i965->intel.device_info)) {
1237         BEGIN_BATCH(batch, 8);
1238         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1239         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1240         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1241         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1242         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1243         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1244         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1245         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1246         ADVANCE_BATCH(batch);
1247     } else {
1248         BEGIN_BATCH(batch, 6);
1249         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1250         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1251         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1252         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1253         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1254         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1255         ADVANCE_BATCH(batch);
1256     }
1257 }
1258
1259 static void
1260 i965_render_binding_table_pointers(VADriverContextP ctx)
1261 {
1262     struct i965_driver_data *i965 = i965_driver_data(ctx);
1263     struct intel_batchbuffer *batch = i965->batch;
1264
1265     BEGIN_BATCH(batch, 6);
1266     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1267     OUT_BATCH(batch, 0); /* vs */
1268     OUT_BATCH(batch, 0); /* gs */
1269     OUT_BATCH(batch, 0); /* clip */
1270     OUT_BATCH(batch, 0); /* sf */
1271     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1272     ADVANCE_BATCH(batch);
1273 }
1274
1275 static void 
1276 i965_render_constant_color(VADriverContextP ctx)
1277 {
1278     struct i965_driver_data *i965 = i965_driver_data(ctx);
1279     struct intel_batchbuffer *batch = i965->batch;
1280
1281     BEGIN_BATCH(batch, 5);
1282     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1283     OUT_BATCH(batch, float_to_uint(1.0));
1284     OUT_BATCH(batch, float_to_uint(0.0));
1285     OUT_BATCH(batch, float_to_uint(1.0));
1286     OUT_BATCH(batch, float_to_uint(1.0));
1287     ADVANCE_BATCH(batch);
1288 }
1289
1290 static void
1291 i965_render_pipelined_pointers(VADriverContextP ctx)
1292 {
1293     struct i965_driver_data *i965 = i965_driver_data(ctx);
1294     struct intel_batchbuffer *batch = i965->batch;
1295     struct i965_render_state *render_state = &i965->render_state;
1296
1297     BEGIN_BATCH(batch, 7);
1298     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1299     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1300     OUT_BATCH(batch, 0);  /* disable GS */
1301     OUT_BATCH(batch, 0);  /* disable CLIP */
1302     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1303     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1304     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1305     ADVANCE_BATCH(batch);
1306 }
1307
1308 static void
1309 i965_render_urb_layout(VADriverContextP ctx)
1310 {
1311     struct i965_driver_data *i965 = i965_driver_data(ctx);
1312     struct intel_batchbuffer *batch = i965->batch;
1313     int urb_vs_start, urb_vs_size;
1314     int urb_gs_start, urb_gs_size;
1315     int urb_clip_start, urb_clip_size;
1316     int urb_sf_start, urb_sf_size;
1317     int urb_cs_start, urb_cs_size;
1318
1319     urb_vs_start = 0;
1320     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1321     urb_gs_start = urb_vs_start + urb_vs_size;
1322     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1323     urb_clip_start = urb_gs_start + urb_gs_size;
1324     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1325     urb_sf_start = urb_clip_start + urb_clip_size;
1326     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1327     urb_cs_start = urb_sf_start + urb_sf_size;
1328     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1329
1330     BEGIN_BATCH(batch, 3);
1331     OUT_BATCH(batch, 
1332               CMD_URB_FENCE |
1333               UF0_CS_REALLOC |
1334               UF0_SF_REALLOC |
1335               UF0_CLIP_REALLOC |
1336               UF0_GS_REALLOC |
1337               UF0_VS_REALLOC |
1338               1);
1339     OUT_BATCH(batch, 
1340               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1341               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1342               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1343     OUT_BATCH(batch,
1344               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1345               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1346     ADVANCE_BATCH(batch);
1347 }
1348
1349 static void 
1350 i965_render_cs_urb_layout(VADriverContextP ctx)
1351 {
1352     struct i965_driver_data *i965 = i965_driver_data(ctx);
1353     struct intel_batchbuffer *batch = i965->batch;
1354
1355     BEGIN_BATCH(batch, 2);
1356     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1357     OUT_BATCH(batch,
1358               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1359               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1360     ADVANCE_BATCH(batch);
1361 }
1362
1363 static void
1364 i965_render_constant_buffer(VADriverContextP ctx)
1365 {
1366     struct i965_driver_data *i965 = i965_driver_data(ctx);
1367     struct intel_batchbuffer *batch = i965->batch;
1368     struct i965_render_state *render_state = &i965->render_state;
1369
1370     BEGIN_BATCH(batch, 2);
1371     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1372     OUT_RELOC(batch, render_state->curbe.bo,
1373               I915_GEM_DOMAIN_INSTRUCTION, 0,
1374               URB_CS_ENTRY_SIZE - 1);
1375     ADVANCE_BATCH(batch);    
1376 }
1377
1378 static void
1379 i965_render_drawing_rectangle(VADriverContextP ctx)
1380 {
1381     struct i965_driver_data *i965 = i965_driver_data(ctx);
1382     struct intel_batchbuffer *batch = i965->batch;
1383     struct i965_render_state *render_state = &i965->render_state;
1384     struct intel_region *dest_region = render_state->draw_region;
1385
1386     BEGIN_BATCH(batch, 4);
1387     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1388     OUT_BATCH(batch, 0x00000000);
1389     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1390     OUT_BATCH(batch, 0x00000000);         
1391     ADVANCE_BATCH(batch);
1392 }
1393
1394 static void
1395 i965_render_vertex_elements(VADriverContextP ctx)
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct intel_batchbuffer *batch = i965->batch;
1399
1400     if (IS_IRONLAKE(i965->intel.device_info)) {
1401         BEGIN_BATCH(batch, 5);
1402         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1403         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1404         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1405                   VE0_VALID |
1406                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1407                   (0 << VE0_OFFSET_SHIFT));
1408         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1409                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1410                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1411                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1412         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1413         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1414                   VE0_VALID |
1415                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1416                   (8 << VE0_OFFSET_SHIFT));
1417         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1418                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1419                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1420                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1421         ADVANCE_BATCH(batch);
1422     } else {
1423         BEGIN_BATCH(batch, 5);
1424         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1425         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1426         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1427                   VE0_VALID |
1428                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1429                   (0 << VE0_OFFSET_SHIFT));
1430         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1431                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1432                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1433                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1434                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1435         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1436         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1437                   VE0_VALID |
1438                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1439                   (8 << VE0_OFFSET_SHIFT));
1440         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1441                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1442                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1443                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1444                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1445         ADVANCE_BATCH(batch);
1446     }
1447 }
1448
1449 static void
1450 i965_render_upload_image_palette(
1451     VADriverContextP ctx,
1452     struct object_image *obj_image,
1453     unsigned int     alpha
1454 )
1455 {
1456     struct i965_driver_data *i965 = i965_driver_data(ctx);
1457     struct intel_batchbuffer *batch = i965->batch;
1458     unsigned int i;
1459
1460     assert(obj_image);
1461
1462     if (!obj_image)
1463         return;
1464
1465     if (obj_image->image.num_palette_entries == 0)
1466         return;
1467
1468     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1469     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1470     /*fill palette*/
1471     //int32_t out[16]; //0-23:color 23-31:alpha
1472     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1473         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1474     ADVANCE_BATCH(batch);
1475 }
1476
1477 static void
1478 i965_render_startup(VADriverContextP ctx)
1479 {
1480     struct i965_driver_data *i965 = i965_driver_data(ctx);
1481     struct intel_batchbuffer *batch = i965->batch;
1482     struct i965_render_state *render_state = &i965->render_state;
1483
1484     BEGIN_BATCH(batch, 11);
1485     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1486     OUT_BATCH(batch, 
1487               (0 << VB0_BUFFER_INDEX_SHIFT) |
1488               VB0_VERTEXDATA |
1489               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1490     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1491
1492     if (IS_IRONLAKE(i965->intel.device_info))
1493         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1494     else
1495         OUT_BATCH(batch, 3);
1496
1497     OUT_BATCH(batch, 0);
1498
1499     OUT_BATCH(batch, 
1500               CMD_3DPRIMITIVE |
1501               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1502               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1503               (0 << 9) |
1504               4);
1505     OUT_BATCH(batch, 3); /* vertex count per instance */
1506     OUT_BATCH(batch, 0); /* start vertex offset */
1507     OUT_BATCH(batch, 1); /* single instance */
1508     OUT_BATCH(batch, 0); /* start instance location */
1509     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1510     ADVANCE_BATCH(batch);
1511 }
1512
1513 static void 
1514 i965_clear_dest_region(VADriverContextP ctx)
1515 {
1516     struct i965_driver_data *i965 = i965_driver_data(ctx);
1517     struct intel_batchbuffer *batch = i965->batch;
1518     struct i965_render_state *render_state = &i965->render_state;
1519     struct intel_region *dest_region = render_state->draw_region;
1520     unsigned int blt_cmd, br13;
1521     int pitch;
1522
1523     blt_cmd = XY_COLOR_BLT_CMD;
1524     br13 = 0xf0 << 16;
1525     pitch = dest_region->pitch;
1526
1527     if (dest_region->cpp == 4) {
1528         br13 |= BR13_8888;
1529         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1530     } else {
1531         assert(dest_region->cpp == 2);
1532         br13 |= BR13_565;
1533     }
1534
1535     if (dest_region->tiling != I915_TILING_NONE) {
1536         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1537         pitch /= 4;
1538     }
1539
1540     br13 |= pitch;
1541
1542     if (IS_GEN6(i965->intel.device_info) ||
1543         IS_GEN7(i965->intel.device_info) ||
1544         IS_GEN8(i965->intel.device_info)) {
1545         intel_batchbuffer_start_atomic_blt(batch, 24);
1546         BEGIN_BLT_BATCH(batch, 6);
1547     } else {
1548         intel_batchbuffer_start_atomic(batch, 24);
1549         BEGIN_BATCH(batch, 6);
1550     }
1551
1552     OUT_BATCH(batch, blt_cmd);
1553     OUT_BATCH(batch, br13);
1554     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1555     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1556               (dest_region->x + dest_region->width));
1557     OUT_RELOC(batch, dest_region->bo, 
1558               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1559               0);
1560     OUT_BATCH(batch, 0x0);
1561     ADVANCE_BATCH(batch);
1562     intel_batchbuffer_end_atomic(batch);
1563 }
1564
1565 static void
1566 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1567 {
1568     struct i965_driver_data *i965 = i965_driver_data(ctx);
1569     struct intel_batchbuffer *batch = i965->batch;
1570
1571     i965_clear_dest_region(ctx);
1572     intel_batchbuffer_start_atomic(batch, 0x1000);
1573     intel_batchbuffer_emit_mi_flush(batch);
1574     i965_render_pipeline_select(ctx);
1575     i965_render_state_sip(ctx);
1576     i965_render_state_base_address(ctx);
1577     i965_render_binding_table_pointers(ctx);
1578     i965_render_constant_color(ctx);
1579     i965_render_pipelined_pointers(ctx);
1580     i965_render_urb_layout(ctx);
1581     i965_render_cs_urb_layout(ctx);
1582     i965_render_constant_buffer(ctx);
1583     i965_render_drawing_rectangle(ctx);
1584     i965_render_vertex_elements(ctx);
1585     i965_render_startup(ctx);
1586     intel_batchbuffer_end_atomic(batch);
1587 }
1588
1589 static void
1590 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1591 {
1592     struct i965_driver_data *i965 = i965_driver_data(ctx);
1593     struct intel_batchbuffer *batch = i965->batch;
1594
1595     intel_batchbuffer_start_atomic(batch, 0x1000);
1596     intel_batchbuffer_emit_mi_flush(batch);
1597     i965_render_pipeline_select(ctx);
1598     i965_render_state_sip(ctx);
1599     i965_render_state_base_address(ctx);
1600     i965_render_binding_table_pointers(ctx);
1601     i965_render_constant_color(ctx);
1602     i965_render_pipelined_pointers(ctx);
1603     i965_render_urb_layout(ctx);
1604     i965_render_cs_urb_layout(ctx);
1605     i965_render_constant_buffer(ctx);
1606     i965_render_drawing_rectangle(ctx);
1607     i965_render_vertex_elements(ctx);
1608     i965_render_startup(ctx);
1609     intel_batchbuffer_end_atomic(batch);
1610 }
1611
1612
1613 static void 
1614 i965_render_initialize(VADriverContextP ctx)
1615 {
1616     struct i965_driver_data *i965 = i965_driver_data(ctx);
1617     struct i965_render_state *render_state = &i965->render_state;
1618     dri_bo *bo;
1619
1620     /* VERTEX BUFFER */
1621     dri_bo_unreference(render_state->vb.vertex_buffer);
1622     bo = dri_bo_alloc(i965->intel.bufmgr,
1623                       "vertex buffer",
1624                       4096,
1625                       4096);
1626     assert(bo);
1627     render_state->vb.vertex_buffer = bo;
1628
1629     /* VS */
1630     dri_bo_unreference(render_state->vs.state);
1631     bo = dri_bo_alloc(i965->intel.bufmgr,
1632                       "vs state",
1633                       sizeof(struct i965_vs_unit_state),
1634                       64);
1635     assert(bo);
1636     render_state->vs.state = bo;
1637
1638     /* GS */
1639     /* CLIP */
1640     /* SF */
1641     dri_bo_unreference(render_state->sf.state);
1642     bo = dri_bo_alloc(i965->intel.bufmgr,
1643                       "sf state",
1644                       sizeof(struct i965_sf_unit_state),
1645                       64);
1646     assert(bo);
1647     render_state->sf.state = bo;
1648
1649     /* WM */
1650     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1651     bo = dri_bo_alloc(i965->intel.bufmgr,
1652                       "surface state & binding table",
1653                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1654                       4096);
1655     assert(bo);
1656     render_state->wm.surface_state_binding_table_bo = bo;
1657
1658     dri_bo_unreference(render_state->wm.sampler);
1659     bo = dri_bo_alloc(i965->intel.bufmgr,
1660                       "sampler state",
1661                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1662                       64);
1663     assert(bo);
1664     render_state->wm.sampler = bo;
1665     render_state->wm.sampler_count = 0;
1666
1667     dri_bo_unreference(render_state->wm.state);
1668     bo = dri_bo_alloc(i965->intel.bufmgr,
1669                       "wm state",
1670                       sizeof(struct i965_wm_unit_state),
1671                       64);
1672     assert(bo);
1673     render_state->wm.state = bo;
1674
1675     /* COLOR CALCULATOR */
1676     dri_bo_unreference(render_state->cc.state);
1677     bo = dri_bo_alloc(i965->intel.bufmgr,
1678                       "color calc state",
1679                       sizeof(struct i965_cc_unit_state),
1680                       64);
1681     assert(bo);
1682     render_state->cc.state = bo;
1683
1684     dri_bo_unreference(render_state->cc.viewport);
1685     bo = dri_bo_alloc(i965->intel.bufmgr,
1686                       "cc viewport",
1687                       sizeof(struct i965_cc_viewport),
1688                       64);
1689     assert(bo);
1690     render_state->cc.viewport = bo;
1691 }
1692
1693 static void
1694 i965_render_put_surface(
1695     VADriverContextP   ctx,
1696     struct object_surface *obj_surface,
1697     const VARectangle *src_rect,
1698     const VARectangle *dst_rect,
1699     unsigned int       flags
1700 )
1701 {
1702     struct i965_driver_data *i965 = i965_driver_data(ctx);
1703     struct intel_batchbuffer *batch = i965->batch;
1704
1705     i965_render_initialize(ctx);
1706     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1707     i965_surface_render_pipeline_setup(ctx);
1708     intel_batchbuffer_flush(batch);
1709 }
1710
1711 static void
1712 i965_render_put_subpicture(
1713     VADriverContextP   ctx,
1714     struct object_surface *obj_surface,
1715     const VARectangle *src_rect,
1716     const VARectangle *dst_rect
1717 )
1718 {
1719     struct i965_driver_data *i965 = i965_driver_data(ctx);
1720     struct intel_batchbuffer *batch = i965->batch;
1721     unsigned int index = obj_surface->subpic_render_idx;
1722     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1723
1724     assert(obj_subpic);
1725
1726     i965_render_initialize(ctx);
1727     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1728     i965_subpic_render_pipeline_setup(ctx);
1729     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1730     intel_batchbuffer_flush(batch);
1731 }
1732
1733 /*
1734  * for GEN6+
1735  */
1736 static void 
1737 gen6_render_initialize(VADriverContextP ctx)
1738 {
1739     struct i965_driver_data *i965 = i965_driver_data(ctx);
1740     struct i965_render_state *render_state = &i965->render_state;
1741     dri_bo *bo;
1742
1743     /* VERTEX BUFFER */
1744     dri_bo_unreference(render_state->vb.vertex_buffer);
1745     bo = dri_bo_alloc(i965->intel.bufmgr,
1746                       "vertex buffer",
1747                       4096,
1748                       4096);
1749     assert(bo);
1750     render_state->vb.vertex_buffer = bo;
1751
1752     /* WM */
1753     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1754     bo = dri_bo_alloc(i965->intel.bufmgr,
1755                       "surface state & binding table",
1756                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1757                       4096);
1758     assert(bo);
1759     render_state->wm.surface_state_binding_table_bo = bo;
1760
1761     dri_bo_unreference(render_state->wm.sampler);
1762     bo = dri_bo_alloc(i965->intel.bufmgr,
1763                       "sampler state",
1764                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1765                       4096);
1766     assert(bo);
1767     render_state->wm.sampler = bo;
1768     render_state->wm.sampler_count = 0;
1769
1770     /* COLOR CALCULATOR */
1771     dri_bo_unreference(render_state->cc.state);
1772     bo = dri_bo_alloc(i965->intel.bufmgr,
1773                       "color calc state",
1774                       sizeof(struct gen6_color_calc_state),
1775                       4096);
1776     assert(bo);
1777     render_state->cc.state = bo;
1778
1779     /* CC VIEWPORT */
1780     dri_bo_unreference(render_state->cc.viewport);
1781     bo = dri_bo_alloc(i965->intel.bufmgr,
1782                       "cc viewport",
1783                       sizeof(struct i965_cc_viewport),
1784                       4096);
1785     assert(bo);
1786     render_state->cc.viewport = bo;
1787
1788     /* BLEND STATE */
1789     dri_bo_unreference(render_state->cc.blend);
1790     bo = dri_bo_alloc(i965->intel.bufmgr,
1791                       "blend state",
1792                       sizeof(struct gen6_blend_state),
1793                       4096);
1794     assert(bo);
1795     render_state->cc.blend = bo;
1796
1797     /* DEPTH & STENCIL STATE */
1798     dri_bo_unreference(render_state->cc.depth_stencil);
1799     bo = dri_bo_alloc(i965->intel.bufmgr,
1800                       "depth & stencil state",
1801                       sizeof(struct gen6_depth_stencil_state),
1802                       4096);
1803     assert(bo);
1804     render_state->cc.depth_stencil = bo;
1805 }
1806
1807 static void
1808 gen6_render_color_calc_state(VADriverContextP ctx)
1809 {
1810     struct i965_driver_data *i965 = i965_driver_data(ctx);
1811     struct i965_render_state *render_state = &i965->render_state;
1812     struct gen6_color_calc_state *color_calc_state;
1813     
1814     dri_bo_map(render_state->cc.state, 1);
1815     assert(render_state->cc.state->virtual);
1816     color_calc_state = render_state->cc.state->virtual;
1817     memset(color_calc_state, 0, sizeof(*color_calc_state));
1818     color_calc_state->constant_r = 1.0;
1819     color_calc_state->constant_g = 0.0;
1820     color_calc_state->constant_b = 1.0;
1821     color_calc_state->constant_a = 1.0;
1822     dri_bo_unmap(render_state->cc.state);
1823 }
1824
1825 static void
1826 gen6_render_blend_state(VADriverContextP ctx)
1827 {
1828     struct i965_driver_data *i965 = i965_driver_data(ctx);
1829     struct i965_render_state *render_state = &i965->render_state;
1830     struct gen6_blend_state *blend_state;
1831     
1832     dri_bo_map(render_state->cc.blend, 1);
1833     assert(render_state->cc.blend->virtual);
1834     blend_state = render_state->cc.blend->virtual;
1835     memset(blend_state, 0, sizeof(*blend_state));
1836     blend_state->blend1.logic_op_enable = 1;
1837     blend_state->blend1.logic_op_func = 0xc;
1838     dri_bo_unmap(render_state->cc.blend);
1839 }
1840
1841 static void
1842 gen6_render_depth_stencil_state(VADriverContextP ctx)
1843 {
1844     struct i965_driver_data *i965 = i965_driver_data(ctx);
1845     struct i965_render_state *render_state = &i965->render_state;
1846     struct gen6_depth_stencil_state *depth_stencil_state;
1847     
1848     dri_bo_map(render_state->cc.depth_stencil, 1);
1849     assert(render_state->cc.depth_stencil->virtual);
1850     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1851     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1852     dri_bo_unmap(render_state->cc.depth_stencil);
1853 }
1854
1855 static void
1856 gen6_render_setup_states(
1857     VADriverContextP   ctx,
1858     struct object_surface *obj_surface,
1859     const VARectangle *src_rect,
1860     const VARectangle *dst_rect,
1861     unsigned int       flags
1862 )
1863 {
1864     i965_render_dest_surface_state(ctx, 0);
1865     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1866     i965_render_sampler(ctx);
1867     i965_render_cc_viewport(ctx);
1868     gen6_render_color_calc_state(ctx);
1869     gen6_render_blend_state(ctx);
1870     gen6_render_depth_stencil_state(ctx);
1871     i965_render_upload_constants(ctx, obj_surface, flags);
1872     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1873 }
1874
1875 static void
1876 gen6_emit_invarient_states(VADriverContextP ctx)
1877 {
1878     struct i965_driver_data *i965 = i965_driver_data(ctx);
1879     struct intel_batchbuffer *batch = i965->batch;
1880
1881     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1882
1883     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1884     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1885               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1886     OUT_BATCH(batch, 0);
1887
1888     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1889     OUT_BATCH(batch, 1);
1890
1891     /* Set system instruction pointer */
1892     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1893     OUT_BATCH(batch, 0);
1894 }
1895
1896 static void
1897 gen6_emit_state_base_address(VADriverContextP ctx)
1898 {
1899     struct i965_driver_data *i965 = i965_driver_data(ctx);
1900     struct intel_batchbuffer *batch = i965->batch;
1901     struct i965_render_state *render_state = &i965->render_state;
1902
1903     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1904     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1905     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1906     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1907     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1908     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1909     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1910     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1911     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1912     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1913 }
1914
1915 static void
1916 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1917 {
1918     struct i965_driver_data *i965 = i965_driver_data(ctx);
1919     struct intel_batchbuffer *batch = i965->batch;
1920     struct i965_render_state *render_state = &i965->render_state;
1921
1922     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1923               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1924               (4 - 2));
1925     OUT_BATCH(batch, 0);
1926     OUT_BATCH(batch, 0);
1927     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1928 }
1929
1930 static void
1931 gen6_emit_urb(VADriverContextP ctx)
1932 {
1933     struct i965_driver_data *i965 = i965_driver_data(ctx);
1934     struct intel_batchbuffer *batch = i965->batch;
1935
1936     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1937     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1938               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1939     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1940               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1941 }
1942
1943 static void
1944 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1945 {
1946     struct i965_driver_data *i965 = i965_driver_data(ctx);
1947     struct intel_batchbuffer *batch = i965->batch;
1948     struct i965_render_state *render_state = &i965->render_state;
1949
1950     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1951     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1952     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1953     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1954 }
1955
1956 static void
1957 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1958 {
1959     struct i965_driver_data *i965 = i965_driver_data(ctx);
1960     struct intel_batchbuffer *batch = i965->batch;
1961     struct i965_render_state *render_state = &i965->render_state;
1962
1963     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1964               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1965               (4 - 2));
1966     OUT_BATCH(batch, 0); /* VS */
1967     OUT_BATCH(batch, 0); /* GS */
1968     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1969 }
1970
1971 static void
1972 gen6_emit_binding_table(VADriverContextP ctx)
1973 {
1974     struct i965_driver_data *i965 = i965_driver_data(ctx);
1975     struct intel_batchbuffer *batch = i965->batch;
1976
1977     /* Binding table pointers */
1978     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1979               GEN6_BINDING_TABLE_MODIFY_PS |
1980               (4 - 2));
1981     OUT_BATCH(batch, 0);                /* vs */
1982     OUT_BATCH(batch, 0);                /* gs */
1983     /* Only the PS uses the binding table */
1984     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1985 }
1986
1987 static void
1988 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1989 {
1990     struct i965_driver_data *i965 = i965_driver_data(ctx);
1991     struct intel_batchbuffer *batch = i965->batch;
1992
1993     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1994     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1995               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1996     OUT_BATCH(batch, 0);
1997     OUT_BATCH(batch, 0);
1998     OUT_BATCH(batch, 0);
1999     OUT_BATCH(batch, 0);
2000     OUT_BATCH(batch, 0);
2001
2002     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2003     OUT_BATCH(batch, 0);
2004 }
2005
2006 static void
2007 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2008 {
2009     i965_render_drawing_rectangle(ctx);
2010 }
2011
2012 static void 
2013 gen6_emit_vs_state(VADriverContextP ctx)
2014 {
2015     struct i965_driver_data *i965 = i965_driver_data(ctx);
2016     struct intel_batchbuffer *batch = i965->batch;
2017
2018     /* disable VS constant buffer */
2019     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2020     OUT_BATCH(batch, 0);
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0);
2024         
2025     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2026     OUT_BATCH(batch, 0); /* without VS kernel */
2027     OUT_BATCH(batch, 0);
2028     OUT_BATCH(batch, 0);
2029     OUT_BATCH(batch, 0);
2030     OUT_BATCH(batch, 0); /* pass-through */
2031 }
2032
2033 static void 
2034 gen6_emit_gs_state(VADriverContextP ctx)
2035 {
2036     struct i965_driver_data *i965 = i965_driver_data(ctx);
2037     struct intel_batchbuffer *batch = i965->batch;
2038
2039     /* disable GS constant buffer */
2040     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2041     OUT_BATCH(batch, 0);
2042     OUT_BATCH(batch, 0);
2043     OUT_BATCH(batch, 0);
2044     OUT_BATCH(batch, 0);
2045         
2046     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2047     OUT_BATCH(batch, 0); /* without GS kernel */
2048     OUT_BATCH(batch, 0);
2049     OUT_BATCH(batch, 0);
2050     OUT_BATCH(batch, 0);
2051     OUT_BATCH(batch, 0);
2052     OUT_BATCH(batch, 0); /* pass-through */
2053 }
2054
2055 static void 
2056 gen6_emit_clip_state(VADriverContextP ctx)
2057 {
2058     struct i965_driver_data *i965 = i965_driver_data(ctx);
2059     struct intel_batchbuffer *batch = i965->batch;
2060
2061     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2062     OUT_BATCH(batch, 0);
2063     OUT_BATCH(batch, 0); /* pass-through */
2064     OUT_BATCH(batch, 0);
2065 }
2066
2067 static void 
2068 gen6_emit_sf_state(VADriverContextP ctx)
2069 {
2070     struct i965_driver_data *i965 = i965_driver_data(ctx);
2071     struct intel_batchbuffer *batch = i965->batch;
2072
2073     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2074     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2075               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2076               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2077     OUT_BATCH(batch, 0);
2078     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2079     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2080     OUT_BATCH(batch, 0);
2081     OUT_BATCH(batch, 0);
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, 0);
2084     OUT_BATCH(batch, 0); /* DW9 */
2085     OUT_BATCH(batch, 0);
2086     OUT_BATCH(batch, 0);
2087     OUT_BATCH(batch, 0);
2088     OUT_BATCH(batch, 0);
2089     OUT_BATCH(batch, 0); /* DW14 */
2090     OUT_BATCH(batch, 0);
2091     OUT_BATCH(batch, 0);
2092     OUT_BATCH(batch, 0);
2093     OUT_BATCH(batch, 0);
2094     OUT_BATCH(batch, 0); /* DW19 */
2095 }
2096
2097 static void 
2098 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2099 {
2100     struct i965_driver_data *i965 = i965_driver_data(ctx);
2101     struct intel_batchbuffer *batch = i965->batch;
2102     struct i965_render_state *render_state = &i965->render_state;
2103
2104     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2105               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2106               (5 - 2));
2107     OUT_RELOC(batch, 
2108               render_state->curbe.bo,
2109               I915_GEM_DOMAIN_INSTRUCTION, 0,
2110               (URB_CS_ENTRY_SIZE-1));
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch, 0);
2113     OUT_BATCH(batch, 0);
2114
2115     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2116     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2117               I915_GEM_DOMAIN_INSTRUCTION, 0,
2118               0);
2119     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2120               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2121     OUT_BATCH(batch, 0);
2122     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2123     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2124               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2125               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2126     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2127               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2128     OUT_BATCH(batch, 0);
2129     OUT_BATCH(batch, 0);
2130 }
2131
2132 static void
2133 gen6_emit_vertex_element_state(VADriverContextP ctx)
2134 {
2135     struct i965_driver_data *i965 = i965_driver_data(ctx);
2136     struct intel_batchbuffer *batch = i965->batch;
2137
2138     /* Set up our vertex elements, sourced from the single vertex buffer. */
2139     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2140     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2141     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2142               GEN6_VE0_VALID |
2143               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2144               (0 << VE0_OFFSET_SHIFT));
2145     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2146               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2147               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2148               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2149     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2150     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2151               GEN6_VE0_VALID |
2152               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2153               (8 << VE0_OFFSET_SHIFT));
2154     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2155               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2156               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2157               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2158 }
2159
2160 static void
2161 gen6_emit_vertices(VADriverContextP ctx)
2162 {
2163     struct i965_driver_data *i965 = i965_driver_data(ctx);
2164     struct intel_batchbuffer *batch = i965->batch;
2165     struct i965_render_state *render_state = &i965->render_state;
2166
2167     BEGIN_BATCH(batch, 11);
2168     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2169     OUT_BATCH(batch, 
2170               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2171               GEN6_VB0_VERTEXDATA |
2172               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2173     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2174     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2175     OUT_BATCH(batch, 0);
2176
2177     OUT_BATCH(batch, 
2178               CMD_3DPRIMITIVE |
2179               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2180               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2181               (0 << 9) |
2182               4);
2183     OUT_BATCH(batch, 3); /* vertex count per instance */
2184     OUT_BATCH(batch, 0); /* start vertex offset */
2185     OUT_BATCH(batch, 1); /* single instance */
2186     OUT_BATCH(batch, 0); /* start instance location */
2187     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2188     ADVANCE_BATCH(batch);
2189 }
2190
2191 static void
2192 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2193 {
2194     struct i965_driver_data *i965 = i965_driver_data(ctx);
2195     struct intel_batchbuffer *batch = i965->batch;
2196
2197     intel_batchbuffer_start_atomic(batch, 0x1000);
2198     intel_batchbuffer_emit_mi_flush(batch);
2199     gen6_emit_invarient_states(ctx);
2200     gen6_emit_state_base_address(ctx);
2201     gen6_emit_viewport_state_pointers(ctx);
2202     gen6_emit_urb(ctx);
2203     gen6_emit_cc_state_pointers(ctx);
2204     gen6_emit_sampler_state_pointers(ctx);
2205     gen6_emit_vs_state(ctx);
2206     gen6_emit_gs_state(ctx);
2207     gen6_emit_clip_state(ctx);
2208     gen6_emit_sf_state(ctx);
2209     gen6_emit_wm_state(ctx, kernel);
2210     gen6_emit_binding_table(ctx);
2211     gen6_emit_depth_buffer_state(ctx);
2212     gen6_emit_drawing_rectangle(ctx);
2213     gen6_emit_vertex_element_state(ctx);
2214     gen6_emit_vertices(ctx);
2215     intel_batchbuffer_end_atomic(batch);
2216 }
2217
2218 static void
2219 gen6_render_put_surface(
2220     VADriverContextP   ctx,
2221     struct object_surface *obj_surface,
2222     const VARectangle *src_rect,
2223     const VARectangle *dst_rect,
2224     unsigned int       flags
2225 )
2226 {
2227     struct i965_driver_data *i965 = i965_driver_data(ctx);
2228     struct intel_batchbuffer *batch = i965->batch;
2229
2230     gen6_render_initialize(ctx);
2231     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2232     i965_clear_dest_region(ctx);
2233     gen6_render_emit_states(ctx, PS_KERNEL);
2234     intel_batchbuffer_flush(batch);
2235 }
2236
2237 static void
2238 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2239 {
2240     struct i965_driver_data *i965 = i965_driver_data(ctx);
2241     struct i965_render_state *render_state = &i965->render_state;
2242     struct gen6_blend_state *blend_state;
2243
2244     dri_bo_unmap(render_state->cc.state);    
2245     dri_bo_map(render_state->cc.blend, 1);
2246     assert(render_state->cc.blend->virtual);
2247     blend_state = render_state->cc.blend->virtual;
2248     memset(blend_state, 0, sizeof(*blend_state));
2249     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2250     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2251     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2252     blend_state->blend0.blend_enable = 1;
2253     blend_state->blend1.post_blend_clamp_enable = 1;
2254     blend_state->blend1.pre_blend_clamp_enable = 1;
2255     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2256     dri_bo_unmap(render_state->cc.blend);
2257 }
2258
2259 static void
2260 gen6_subpicture_render_setup_states(
2261     VADriverContextP   ctx,
2262     struct object_surface *obj_surface,
2263     const VARectangle *src_rect,
2264     const VARectangle *dst_rect
2265 )
2266 {
2267     i965_render_dest_surface_state(ctx, 0);
2268     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2269     i965_render_sampler(ctx);
2270     i965_render_cc_viewport(ctx);
2271     gen6_render_color_calc_state(ctx);
2272     gen6_subpicture_render_blend_state(ctx);
2273     gen6_render_depth_stencil_state(ctx);
2274     i965_subpic_render_upload_constants(ctx, obj_surface);
2275     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2276 }
2277
2278 static void
2279 gen6_render_put_subpicture(
2280     VADriverContextP   ctx,
2281     struct object_surface *obj_surface,
2282     const VARectangle *src_rect,
2283     const VARectangle *dst_rect
2284 )
2285 {
2286     struct i965_driver_data *i965 = i965_driver_data(ctx);
2287     struct intel_batchbuffer *batch = i965->batch;
2288     unsigned int index = obj_surface->subpic_render_idx;
2289     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2290
2291     assert(obj_subpic);
2292     gen6_render_initialize(ctx);
2293     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2294     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2295     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2296     intel_batchbuffer_flush(batch);
2297 }
2298
2299 /*
2300  * for GEN7
2301  */
2302 static void 
2303 gen7_render_initialize(VADriverContextP ctx)
2304 {
2305     struct i965_driver_data *i965 = i965_driver_data(ctx);
2306     struct i965_render_state *render_state = &i965->render_state;
2307     dri_bo *bo;
2308
2309     /* VERTEX BUFFER */
2310     dri_bo_unreference(render_state->vb.vertex_buffer);
2311     bo = dri_bo_alloc(i965->intel.bufmgr,
2312                       "vertex buffer",
2313                       4096,
2314                       4096);
2315     assert(bo);
2316     render_state->vb.vertex_buffer = bo;
2317
2318     /* WM */
2319     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2320     bo = dri_bo_alloc(i965->intel.bufmgr,
2321                       "surface state & binding table",
2322                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2323                       4096);
2324     assert(bo);
2325     render_state->wm.surface_state_binding_table_bo = bo;
2326
2327     dri_bo_unreference(render_state->wm.sampler);
2328     bo = dri_bo_alloc(i965->intel.bufmgr,
2329                       "sampler state",
2330                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2331                       4096);
2332     assert(bo);
2333     render_state->wm.sampler = bo;
2334     render_state->wm.sampler_count = 0;
2335
2336     /* COLOR CALCULATOR */
2337     dri_bo_unreference(render_state->cc.state);
2338     bo = dri_bo_alloc(i965->intel.bufmgr,
2339                       "color calc state",
2340                       sizeof(struct gen6_color_calc_state),
2341                       4096);
2342     assert(bo);
2343     render_state->cc.state = bo;
2344
2345     /* CC VIEWPORT */
2346     dri_bo_unreference(render_state->cc.viewport);
2347     bo = dri_bo_alloc(i965->intel.bufmgr,
2348                       "cc viewport",
2349                       sizeof(struct i965_cc_viewport),
2350                       4096);
2351     assert(bo);
2352     render_state->cc.viewport = bo;
2353
2354     /* BLEND STATE */
2355     dri_bo_unreference(render_state->cc.blend);
2356     bo = dri_bo_alloc(i965->intel.bufmgr,
2357                       "blend state",
2358                       sizeof(struct gen6_blend_state),
2359                       4096);
2360     assert(bo);
2361     render_state->cc.blend = bo;
2362
2363     /* DEPTH & STENCIL STATE */
2364     dri_bo_unreference(render_state->cc.depth_stencil);
2365     bo = dri_bo_alloc(i965->intel.bufmgr,
2366                       "depth & stencil state",
2367                       sizeof(struct gen6_depth_stencil_state),
2368                       4096);
2369     assert(bo);
2370     render_state->cc.depth_stencil = bo;
2371 }
2372
2373 /*
2374  * for GEN8
2375  */
2376 #define ALIGNMENT       64
2377
2378 static void
2379 gen7_render_color_calc_state(VADriverContextP ctx)
2380 {
2381     struct i965_driver_data *i965 = i965_driver_data(ctx);
2382     struct i965_render_state *render_state = &i965->render_state;
2383     struct gen6_color_calc_state *color_calc_state;
2384     
2385     dri_bo_map(render_state->cc.state, 1);
2386     assert(render_state->cc.state->virtual);
2387     color_calc_state = render_state->cc.state->virtual;
2388     memset(color_calc_state, 0, sizeof(*color_calc_state));
2389     color_calc_state->constant_r = 1.0;
2390     color_calc_state->constant_g = 0.0;
2391     color_calc_state->constant_b = 1.0;
2392     color_calc_state->constant_a = 1.0;
2393     dri_bo_unmap(render_state->cc.state);
2394 }
2395
2396 static void
2397 gen7_render_blend_state(VADriverContextP ctx)
2398 {
2399     struct i965_driver_data *i965 = i965_driver_data(ctx);
2400     struct i965_render_state *render_state = &i965->render_state;
2401     struct gen6_blend_state *blend_state;
2402     
2403     dri_bo_map(render_state->cc.blend, 1);
2404     assert(render_state->cc.blend->virtual);
2405     blend_state = render_state->cc.blend->virtual;
2406     memset(blend_state, 0, sizeof(*blend_state));
2407     blend_state->blend1.logic_op_enable = 1;
2408     blend_state->blend1.logic_op_func = 0xc;
2409     blend_state->blend1.pre_blend_clamp_enable = 1;
2410     dri_bo_unmap(render_state->cc.blend);
2411 }
2412
2413 static void
2414 gen7_render_depth_stencil_state(VADriverContextP ctx)
2415 {
2416     struct i965_driver_data *i965 = i965_driver_data(ctx);
2417     struct i965_render_state *render_state = &i965->render_state;
2418     struct gen6_depth_stencil_state *depth_stencil_state;
2419     
2420     dri_bo_map(render_state->cc.depth_stencil, 1);
2421     assert(render_state->cc.depth_stencil->virtual);
2422     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2423     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2424     dri_bo_unmap(render_state->cc.depth_stencil);
2425 }
2426
2427 static void 
2428 gen7_render_sampler(VADriverContextP ctx)
2429 {
2430     struct i965_driver_data *i965 = i965_driver_data(ctx);
2431     struct i965_render_state *render_state = &i965->render_state;
2432     struct gen7_sampler_state *sampler_state;
2433     int i;
2434     
2435     assert(render_state->wm.sampler_count > 0);
2436     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2437
2438     dri_bo_map(render_state->wm.sampler, 1);
2439     assert(render_state->wm.sampler->virtual);
2440     sampler_state = render_state->wm.sampler->virtual;
2441     for (i = 0; i < render_state->wm.sampler_count; i++) {
2442         memset(sampler_state, 0, sizeof(*sampler_state));
2443         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2444         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2445         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2446         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2447         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2448         sampler_state++;
2449     }
2450
2451     dri_bo_unmap(render_state->wm.sampler);
2452 }
2453
2454
2455 static void
2456 gen7_render_setup_states(
2457     VADriverContextP   ctx,
2458     struct object_surface *obj_surface,
2459     const VARectangle *src_rect,
2460     const VARectangle *dst_rect,
2461     unsigned int       flags
2462 )
2463 {
2464     i965_render_dest_surface_state(ctx, 0);
2465     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2466     gen7_render_sampler(ctx);
2467     i965_render_cc_viewport(ctx);
2468     gen7_render_color_calc_state(ctx);
2469     gen7_render_blend_state(ctx);
2470     gen7_render_depth_stencil_state(ctx);
2471     i965_render_upload_constants(ctx, obj_surface, flags);
2472     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2473 }
2474
2475
2476 static void
2477 gen7_emit_invarient_states(VADriverContextP ctx)
2478 {
2479     struct i965_driver_data *i965 = i965_driver_data(ctx);
2480     struct intel_batchbuffer *batch = i965->batch;
2481
2482     BEGIN_BATCH(batch, 1);
2483     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2484     ADVANCE_BATCH(batch);
2485
2486     BEGIN_BATCH(batch, 4);
2487     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2488     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2489               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2490     OUT_BATCH(batch, 0);
2491     OUT_BATCH(batch, 0);
2492     ADVANCE_BATCH(batch);
2493
2494     BEGIN_BATCH(batch, 2);
2495     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2496     OUT_BATCH(batch, 1);
2497     ADVANCE_BATCH(batch);
2498
2499     /* Set system instruction pointer */
2500     BEGIN_BATCH(batch, 2);
2501     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2502     OUT_BATCH(batch, 0);
2503     ADVANCE_BATCH(batch);
2504 }
2505
2506 static void
2507 gen7_emit_state_base_address(VADriverContextP ctx)
2508 {
2509     struct i965_driver_data *i965 = i965_driver_data(ctx);
2510     struct intel_batchbuffer *batch = i965->batch;
2511     struct i965_render_state *render_state = &i965->render_state;
2512
2513     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2514     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2515     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2516     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2517     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2518     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2519     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2520     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2521     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2522     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2523 }
2524
2525 static void
2526 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2527 {
2528     struct i965_driver_data *i965 = i965_driver_data(ctx);
2529     struct intel_batchbuffer *batch = i965->batch;
2530     struct i965_render_state *render_state = &i965->render_state;
2531
2532     BEGIN_BATCH(batch, 2);
2533     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2534     OUT_RELOC(batch,
2535               render_state->cc.viewport,
2536               I915_GEM_DOMAIN_INSTRUCTION, 0,
2537               0);
2538     ADVANCE_BATCH(batch);
2539
2540     BEGIN_BATCH(batch, 2);
2541     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2542     OUT_BATCH(batch, 0);
2543     ADVANCE_BATCH(batch);
2544 }
2545
2546 /*
2547  * URB layout on GEN7 
2548  * ----------------------------------------
2549  * | PS Push Constants (8KB) | VS entries |
2550  * ----------------------------------------
2551  */
2552 static void
2553 gen7_emit_urb(VADriverContextP ctx)
2554 {
2555     struct i965_driver_data *i965 = i965_driver_data(ctx);
2556     struct intel_batchbuffer *batch = i965->batch;
2557     unsigned int num_urb_entries = 32;
2558
2559     if (IS_HASWELL(i965->intel.device_info))
2560         num_urb_entries = 64;
2561
2562     BEGIN_BATCH(batch, 2);
2563     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2564     OUT_BATCH(batch, 8); /* in 1KBs */
2565     ADVANCE_BATCH(batch);
2566
2567     BEGIN_BATCH(batch, 2);
2568     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2569     OUT_BATCH(batch, 
2570               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2571               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2572               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2573    ADVANCE_BATCH(batch);
2574
2575    BEGIN_BATCH(batch, 2);
2576    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2577    OUT_BATCH(batch,
2578              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2579              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2580    ADVANCE_BATCH(batch);
2581
2582    BEGIN_BATCH(batch, 2);
2583    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2584    OUT_BATCH(batch,
2585              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2586              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2587    ADVANCE_BATCH(batch);
2588
2589    BEGIN_BATCH(batch, 2);
2590    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2591    OUT_BATCH(batch,
2592              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2593              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2594    ADVANCE_BATCH(batch);
2595 }
2596
2597 static void
2598 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2599 {
2600     struct i965_driver_data *i965 = i965_driver_data(ctx);
2601     struct intel_batchbuffer *batch = i965->batch;
2602     struct i965_render_state *render_state = &i965->render_state;
2603
2604     BEGIN_BATCH(batch, 2);
2605     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2606     OUT_RELOC(batch,
2607               render_state->cc.state,
2608               I915_GEM_DOMAIN_INSTRUCTION, 0,
2609               1);
2610     ADVANCE_BATCH(batch);
2611
2612     BEGIN_BATCH(batch, 2);
2613     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2614     OUT_RELOC(batch,
2615               render_state->cc.blend,
2616               I915_GEM_DOMAIN_INSTRUCTION, 0,
2617               1);
2618     ADVANCE_BATCH(batch);
2619
2620     BEGIN_BATCH(batch, 2);
2621     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2622     OUT_RELOC(batch,
2623               render_state->cc.depth_stencil,
2624               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2625               1);
2626     ADVANCE_BATCH(batch);
2627 }
2628
2629 static void
2630 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2631 {
2632     struct i965_driver_data *i965 = i965_driver_data(ctx);
2633     struct intel_batchbuffer *batch = i965->batch;
2634     struct i965_render_state *render_state = &i965->render_state;
2635
2636     BEGIN_BATCH(batch, 2);
2637     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2638     OUT_RELOC(batch,
2639               render_state->wm.sampler,
2640               I915_GEM_DOMAIN_INSTRUCTION, 0,
2641               0);
2642     ADVANCE_BATCH(batch);
2643 }
2644
2645 static void
2646 gen7_emit_binding_table(VADriverContextP ctx)
2647 {
2648     struct i965_driver_data *i965 = i965_driver_data(ctx);
2649     struct intel_batchbuffer *batch = i965->batch;
2650
2651     BEGIN_BATCH(batch, 2);
2652     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2653     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2654     ADVANCE_BATCH(batch);
2655 }
2656
2657 static void
2658 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2659 {
2660     struct i965_driver_data *i965 = i965_driver_data(ctx);
2661     struct intel_batchbuffer *batch = i965->batch;
2662
2663     BEGIN_BATCH(batch, 7);
2664     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2665     OUT_BATCH(batch,
2666               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2667               (I965_SURFACE_NULL << 29));
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, 0);
2671     OUT_BATCH(batch, 0);
2672     OUT_BATCH(batch, 0);
2673     ADVANCE_BATCH(batch);
2674
2675     BEGIN_BATCH(batch, 3);
2676     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     ADVANCE_BATCH(batch);
2680 }
2681
2682 static void
2683 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2684 {
2685     i965_render_drawing_rectangle(ctx);
2686 }
2687
2688 static void 
2689 gen7_emit_vs_state(VADriverContextP ctx)
2690 {
2691     struct i965_driver_data *i965 = i965_driver_data(ctx);
2692     struct intel_batchbuffer *batch = i965->batch;
2693
2694     /* disable VS constant buffer */
2695     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2696     OUT_BATCH(batch, 0);
2697     OUT_BATCH(batch, 0);
2698     OUT_BATCH(batch, 0);
2699     OUT_BATCH(batch, 0);
2700     OUT_BATCH(batch, 0);
2701     OUT_BATCH(batch, 0);
2702         
2703     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2704     OUT_BATCH(batch, 0); /* without VS kernel */
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0);
2708     OUT_BATCH(batch, 0); /* pass-through */
2709 }
2710
2711 static void 
2712 gen7_emit_bypass_state(VADriverContextP ctx)
2713 {
2714     struct i965_driver_data *i965 = i965_driver_data(ctx);
2715     struct intel_batchbuffer *batch = i965->batch;
2716
2717     /* bypass GS */
2718     BEGIN_BATCH(batch, 7);
2719     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0);
2725     OUT_BATCH(batch, 0);
2726     ADVANCE_BATCH(batch);
2727
2728     BEGIN_BATCH(batch, 7);      
2729     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2730     OUT_BATCH(batch, 0); /* without GS kernel */
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0);
2733     OUT_BATCH(batch, 0);
2734     OUT_BATCH(batch, 0);
2735     OUT_BATCH(batch, 0); /* pass-through */
2736     ADVANCE_BATCH(batch);
2737
2738     BEGIN_BATCH(batch, 2);
2739     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2740     OUT_BATCH(batch, 0);
2741     ADVANCE_BATCH(batch);
2742
2743     /* disable HS */
2744     BEGIN_BATCH(batch, 7);
2745     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2746     OUT_BATCH(batch, 0);
2747     OUT_BATCH(batch, 0);
2748     OUT_BATCH(batch, 0);
2749     OUT_BATCH(batch, 0);
2750     OUT_BATCH(batch, 0);
2751     OUT_BATCH(batch, 0);
2752     ADVANCE_BATCH(batch);
2753
2754     BEGIN_BATCH(batch, 7);
2755     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2756     OUT_BATCH(batch, 0);
2757     OUT_BATCH(batch, 0);
2758     OUT_BATCH(batch, 0);
2759     OUT_BATCH(batch, 0);
2760     OUT_BATCH(batch, 0);
2761     OUT_BATCH(batch, 0);
2762     ADVANCE_BATCH(batch);
2763
2764     BEGIN_BATCH(batch, 2);
2765     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2766     OUT_BATCH(batch, 0);
2767     ADVANCE_BATCH(batch);
2768
2769     /* Disable TE */
2770     BEGIN_BATCH(batch, 4);
2771     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     OUT_BATCH(batch, 0);
2775     ADVANCE_BATCH(batch);
2776
2777     /* Disable DS */
2778     BEGIN_BATCH(batch, 7);
2779     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2780     OUT_BATCH(batch, 0);
2781     OUT_BATCH(batch, 0);
2782     OUT_BATCH(batch, 0);
2783     OUT_BATCH(batch, 0);
2784     OUT_BATCH(batch, 0);
2785     OUT_BATCH(batch, 0);
2786     ADVANCE_BATCH(batch);
2787
2788     BEGIN_BATCH(batch, 6);
2789     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2790     OUT_BATCH(batch, 0);
2791     OUT_BATCH(batch, 0);
2792     OUT_BATCH(batch, 0);
2793     OUT_BATCH(batch, 0);
2794     OUT_BATCH(batch, 0);
2795     ADVANCE_BATCH(batch);
2796
2797     BEGIN_BATCH(batch, 2);
2798     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2799     OUT_BATCH(batch, 0);
2800     ADVANCE_BATCH(batch);
2801
2802     /* Disable STREAMOUT */
2803     BEGIN_BATCH(batch, 3);
2804     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2805     OUT_BATCH(batch, 0);
2806     OUT_BATCH(batch, 0);
2807     ADVANCE_BATCH(batch);
2808 }
2809
2810 static void 
2811 gen7_emit_clip_state(VADriverContextP ctx)
2812 {
2813     struct i965_driver_data *i965 = i965_driver_data(ctx);
2814     struct intel_batchbuffer *batch = i965->batch;
2815
2816     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2817     OUT_BATCH(batch, 0);
2818     OUT_BATCH(batch, 0); /* pass-through */
2819     OUT_BATCH(batch, 0);
2820 }
2821
2822 static void 
2823 gen7_emit_sf_state(VADriverContextP ctx)
2824 {
2825     struct i965_driver_data *i965 = i965_driver_data(ctx);
2826     struct intel_batchbuffer *batch = i965->batch;
2827
2828     BEGIN_BATCH(batch, 14);
2829     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2830     OUT_BATCH(batch,
2831               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2832               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2833               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2834     OUT_BATCH(batch, 0);
2835     OUT_BATCH(batch, 0);
2836     OUT_BATCH(batch, 0); /* DW4 */
2837     OUT_BATCH(batch, 0);
2838     OUT_BATCH(batch, 0);
2839     OUT_BATCH(batch, 0);
2840     OUT_BATCH(batch, 0);
2841     OUT_BATCH(batch, 0); /* DW9 */
2842     OUT_BATCH(batch, 0);
2843     OUT_BATCH(batch, 0);
2844     OUT_BATCH(batch, 0);
2845     OUT_BATCH(batch, 0);
2846     ADVANCE_BATCH(batch);
2847
2848     BEGIN_BATCH(batch, 7);
2849     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2850     OUT_BATCH(batch, 0);
2851     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2852     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2853     OUT_BATCH(batch, 0);
2854     OUT_BATCH(batch, 0);
2855     OUT_BATCH(batch, 0);
2856     ADVANCE_BATCH(batch);
2857 }
2858
2859 static void 
2860 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2861 {
2862     struct i965_driver_data *i965 = i965_driver_data(ctx);
2863     struct intel_batchbuffer *batch = i965->batch;
2864     struct i965_render_state *render_state = &i965->render_state;
2865     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2866     unsigned int num_samples = 0;
2867
2868     if (IS_HASWELL(i965->intel.device_info)) {
2869         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2870         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2871     }
2872
2873     BEGIN_BATCH(batch, 3);
2874     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2875     OUT_BATCH(batch,
2876               GEN7_WM_DISPATCH_ENABLE |
2877               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2878     OUT_BATCH(batch, 0);
2879     ADVANCE_BATCH(batch);
2880
2881     BEGIN_BATCH(batch, 7);
2882     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2883     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2884     OUT_BATCH(batch, 0);
2885     OUT_RELOC(batch, 
2886               render_state->curbe.bo,
2887               I915_GEM_DOMAIN_INSTRUCTION, 0,
2888               0);
2889     OUT_BATCH(batch, 0);
2890     OUT_BATCH(batch, 0);
2891     OUT_BATCH(batch, 0);
2892     ADVANCE_BATCH(batch);
2893
2894     BEGIN_BATCH(batch, 8);
2895     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2896     OUT_RELOC(batch, 
2897               render_state->render_kernels[kernel].bo,
2898               I915_GEM_DOMAIN_INSTRUCTION, 0,
2899               0);
2900     OUT_BATCH(batch, 
2901               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2902               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2903     OUT_BATCH(batch, 0); /* scratch space base offset */
2904     OUT_BATCH(batch, 
2905               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2906               GEN7_PS_PUSH_CONSTANT_ENABLE |
2907               GEN7_PS_ATTRIBUTE_ENABLE |
2908               GEN7_PS_16_DISPATCH_ENABLE);
2909     OUT_BATCH(batch, 
2910               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2911     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2912     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2913     ADVANCE_BATCH(batch);
2914 }
2915
2916 static void
2917 gen7_emit_vertex_element_state(VADriverContextP ctx)
2918 {
2919     struct i965_driver_data *i965 = i965_driver_data(ctx);
2920     struct intel_batchbuffer *batch = i965->batch;
2921
2922     /* Set up our vertex elements, sourced from the single vertex buffer. */
2923     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2924     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2925     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2926               GEN6_VE0_VALID |
2927               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2928               (0 << VE0_OFFSET_SHIFT));
2929     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2930               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2931               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2932               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2933     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2934     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2935               GEN6_VE0_VALID |
2936               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2937               (8 << VE0_OFFSET_SHIFT));
2938     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2939               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2940               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2941               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2942 }
2943
2944 static void
2945 gen7_emit_vertices(VADriverContextP ctx)
2946 {
2947     struct i965_driver_data *i965 = i965_driver_data(ctx);
2948     struct intel_batchbuffer *batch = i965->batch;
2949     struct i965_render_state *render_state = &i965->render_state;
2950
2951     BEGIN_BATCH(batch, 5);
2952     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2953     OUT_BATCH(batch, 
2954               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2955               GEN6_VB0_VERTEXDATA |
2956               GEN7_VB0_ADDRESS_MODIFYENABLE |
2957               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2958     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2959     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2960     OUT_BATCH(batch, 0);
2961     ADVANCE_BATCH(batch);
2962
2963     BEGIN_BATCH(batch, 7);
2964     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2965     OUT_BATCH(batch,
2966               _3DPRIM_RECTLIST |
2967               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2968     OUT_BATCH(batch, 3); /* vertex count per instance */
2969     OUT_BATCH(batch, 0); /* start vertex offset */
2970     OUT_BATCH(batch, 1); /* single instance */
2971     OUT_BATCH(batch, 0); /* start instance location */
2972     OUT_BATCH(batch, 0);
2973     ADVANCE_BATCH(batch);
2974 }
2975
2976 static void
2977 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2978 {
2979     struct i965_driver_data *i965 = i965_driver_data(ctx);
2980     struct intel_batchbuffer *batch = i965->batch;
2981
2982     intel_batchbuffer_start_atomic(batch, 0x1000);
2983     intel_batchbuffer_emit_mi_flush(batch);
2984     gen7_emit_invarient_states(ctx);
2985     gen7_emit_state_base_address(ctx);
2986     gen7_emit_viewport_state_pointers(ctx);
2987     gen7_emit_urb(ctx);
2988     gen7_emit_cc_state_pointers(ctx);
2989     gen7_emit_sampler_state_pointers(ctx);
2990     gen7_emit_bypass_state(ctx);
2991     gen7_emit_vs_state(ctx);
2992     gen7_emit_clip_state(ctx);
2993     gen7_emit_sf_state(ctx);
2994     gen7_emit_wm_state(ctx, kernel);
2995     gen7_emit_binding_table(ctx);
2996     gen7_emit_depth_buffer_state(ctx);
2997     gen7_emit_drawing_rectangle(ctx);
2998     gen7_emit_vertex_element_state(ctx);
2999     gen7_emit_vertices(ctx);
3000     intel_batchbuffer_end_atomic(batch);
3001 }
3002
3003
3004 static void
3005 gen7_render_put_surface(
3006     VADriverContextP   ctx,
3007     struct object_surface *obj_surface,    
3008     const VARectangle *src_rect,
3009     const VARectangle *dst_rect,
3010     unsigned int       flags
3011 )
3012 {
3013     struct i965_driver_data *i965 = i965_driver_data(ctx);
3014     struct intel_batchbuffer *batch = i965->batch;
3015
3016     gen7_render_initialize(ctx);
3017     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3018     i965_clear_dest_region(ctx);
3019     gen7_render_emit_states(ctx, PS_KERNEL);
3020     intel_batchbuffer_flush(batch);
3021 }
3022
3023
3024 static void
3025 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3026 {
3027     struct i965_driver_data *i965 = i965_driver_data(ctx);
3028     struct i965_render_state *render_state = &i965->render_state;
3029     struct gen6_blend_state *blend_state;
3030
3031     dri_bo_unmap(render_state->cc.state);    
3032     dri_bo_map(render_state->cc.blend, 1);
3033     assert(render_state->cc.blend->virtual);
3034     blend_state = render_state->cc.blend->virtual;
3035     memset(blend_state, 0, sizeof(*blend_state));
3036     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3037     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3038     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3039     blend_state->blend0.blend_enable = 1;
3040     blend_state->blend1.post_blend_clamp_enable = 1;
3041     blend_state->blend1.pre_blend_clamp_enable = 1;
3042     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3043     dri_bo_unmap(render_state->cc.blend);
3044 }
3045
3046 static void
3047 gen7_subpicture_render_setup_states(
3048     VADriverContextP   ctx,
3049     struct object_surface *obj_surface,
3050     const VARectangle *src_rect,
3051     const VARectangle *dst_rect
3052 )
3053 {
3054     i965_render_dest_surface_state(ctx, 0);
3055     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3056     i965_render_sampler(ctx);
3057     i965_render_cc_viewport(ctx);
3058     gen7_render_color_calc_state(ctx);
3059     gen7_subpicture_render_blend_state(ctx);
3060     gen7_render_depth_stencil_state(ctx);
3061     i965_subpic_render_upload_constants(ctx, obj_surface);
3062     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3063 }
3064
3065 static void
3066 gen7_render_put_subpicture(
3067     VADriverContextP   ctx,
3068     struct object_surface *obj_surface,
3069     const VARectangle *src_rect,
3070     const VARectangle *dst_rect
3071 )
3072 {
3073     struct i965_driver_data *i965 = i965_driver_data(ctx);
3074     struct intel_batchbuffer *batch = i965->batch;
3075     unsigned int index = obj_surface->subpic_render_idx;
3076     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3077
3078     assert(obj_subpic);
3079     gen7_render_initialize(ctx);
3080     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3081     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3082     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3083     intel_batchbuffer_flush(batch);
3084 }
3085
3086
3087 void
3088 intel_render_put_surface(
3089     VADriverContextP   ctx,
3090     struct object_surface *obj_surface,
3091     const VARectangle *src_rect,
3092     const VARectangle *dst_rect,
3093     unsigned int       flags
3094 )
3095 {
3096     struct i965_driver_data *i965 = i965_driver_data(ctx);
3097     struct i965_render_state *render_state = &i965->render_state;
3098     int has_done_scaling = 0;
3099     VASurfaceID out_surface_id = i965_post_processing(ctx,
3100                                                       obj_surface,
3101                                                       src_rect,
3102                                                       dst_rect,
3103                                                       flags,
3104                                                       &has_done_scaling);
3105
3106     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3107
3108     if (out_surface_id != VA_INVALID_ID) {
3109         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3110         
3111         if (new_obj_surface && new_obj_surface->bo)
3112             obj_surface = new_obj_surface;
3113
3114         if (has_done_scaling)
3115             src_rect = dst_rect;
3116     }
3117
3118     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3119
3120     if (out_surface_id != VA_INVALID_ID)
3121         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3122 }
3123
3124 void
3125 intel_render_put_subpicture(
3126     VADriverContextP   ctx,
3127     struct object_surface *obj_surface,
3128     const VARectangle *src_rect,
3129     const VARectangle *dst_rect
3130 )
3131 {
3132     struct i965_driver_data *i965 = i965_driver_data(ctx);
3133     struct i965_render_state *render_state = &i965->render_state;
3134
3135     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3136 }
3137
3138 static void
3139 genx_render_terminate(VADriverContextP ctx)
3140 {
3141     int i;
3142     struct i965_driver_data *i965 = i965_driver_data(ctx);
3143     struct i965_render_state *render_state = &i965->render_state;
3144
3145     dri_bo_unreference(render_state->curbe.bo);
3146     render_state->curbe.bo = NULL;
3147
3148     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3149         struct i965_kernel *kernel = &render_state->render_kernels[i];
3150
3151         dri_bo_unreference(kernel->bo);
3152         kernel->bo = NULL;
3153     }
3154
3155     dri_bo_unreference(render_state->vb.vertex_buffer);
3156     render_state->vb.vertex_buffer = NULL;
3157     dri_bo_unreference(render_state->vs.state);
3158     render_state->vs.state = NULL;
3159     dri_bo_unreference(render_state->sf.state);
3160     render_state->sf.state = NULL;
3161     dri_bo_unreference(render_state->wm.sampler);
3162     render_state->wm.sampler = NULL;
3163     dri_bo_unreference(render_state->wm.state);
3164     render_state->wm.state = NULL;
3165     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3166     dri_bo_unreference(render_state->cc.viewport);
3167     render_state->cc.viewport = NULL;
3168     dri_bo_unreference(render_state->cc.state);
3169     render_state->cc.state = NULL;
3170     dri_bo_unreference(render_state->cc.blend);
3171     render_state->cc.blend = NULL;
3172     dri_bo_unreference(render_state->cc.depth_stencil);
3173     render_state->cc.depth_stencil = NULL;
3174
3175     if (render_state->draw_region) {
3176         dri_bo_unreference(render_state->draw_region->bo);
3177         free(render_state->draw_region);
3178         render_state->draw_region = NULL;
3179     }
3180 }
3181
3182 bool 
3183 genx_render_init(VADriverContextP ctx)
3184 {
3185     struct i965_driver_data *i965 = i965_driver_data(ctx);
3186     struct i965_render_state *render_state = &i965->render_state;
3187     int i;
3188
3189     /* kernel */
3190     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3191                                  sizeof(render_kernels_gen5[0])));
3192     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3193                                  sizeof(render_kernels_gen6[0])));
3194
3195     if (IS_GEN7(i965->intel.device_info)) {
3196         memcpy(render_state->render_kernels,
3197                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3198                sizeof(render_state->render_kernels));
3199         render_state->render_put_surface = gen7_render_put_surface;
3200         render_state->render_put_subpicture = gen7_render_put_subpicture;
3201     } else if (IS_GEN6(i965->intel.device_info)) {
3202         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3203         render_state->render_put_surface = gen6_render_put_surface;
3204         render_state->render_put_subpicture = gen6_render_put_subpicture;
3205     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3206         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3207         render_state->render_put_surface = i965_render_put_surface;
3208         render_state->render_put_subpicture = i965_render_put_subpicture;
3209     } else {
3210         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3211         render_state->render_put_surface = i965_render_put_surface;
3212         render_state->render_put_subpicture = i965_render_put_subpicture;
3213     }
3214
3215     render_state->render_terminate = genx_render_terminate;
3216
3217     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3218         struct i965_kernel *kernel = &render_state->render_kernels[i];
3219
3220         if (!kernel->size)
3221             continue;
3222
3223         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3224                                   kernel->name, 
3225                                   kernel->size, 0x1000);
3226         assert(kernel->bo);
3227         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3228     }
3229
3230     /* constant buffer */
3231     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3232                       "constant buffer",
3233                       4096, 64);
3234     assert(render_state->curbe.bo);
3235
3236     return true;
3237 }
3238
3239 bool
3240 i965_render_init(VADriverContextP ctx)
3241 {
3242     struct i965_driver_data *i965 = i965_driver_data(ctx);
3243
3244     return i965->codec_info->render_init(ctx);
3245 }
3246
3247 void
3248 i965_render_terminate(VADriverContextP ctx)
3249 {
3250     struct i965_driver_data *i965 = i965_driver_data(ctx);
3251     struct i965_render_state *render_state = &i965->render_state;
3252
3253     render_state->render_terminate(ctx);
3254 }