Update the pixel shader for BDW rendering function
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g8b"
159 #include "shaders/render/exa_wm_src_sample_planar.g8b"
160 #include "shaders/render/exa_wm_yuv_rgb.g8b"
161 #include "shaders/render/exa_wm_write.g8b"
162 };
163
164 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
165 #include "shaders/render/exa_wm_src_affine.g7b"
166 #include "shaders/render/exa_wm_src_sample_argb.g7b"
167 #include "shaders/render/exa_wm_write.g7b"
168 };
169
170
171 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
172                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
173
174 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
175 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
176
177 static uint32_t float_to_uint (float f) 
178 {
179     union {
180         uint32_t i; 
181         float f;
182     } x;
183
184     x.f = f;
185     return x.i;
186 }
187
188 enum 
189 {
190     SF_KERNEL = 0,
191     PS_KERNEL,
192     PS_SUBPIC_KERNEL
193 };
194
195 static struct i965_kernel render_kernels_gen4[] = {
196     {
197         "SF",
198         SF_KERNEL,
199         sf_kernel_static,
200         sizeof(sf_kernel_static),
201         NULL
202     },
203     {
204         "PS",
205         PS_KERNEL,
206         ps_kernel_static,
207         sizeof(ps_kernel_static),
208         NULL
209     },
210
211     {
212         "PS_SUBPIC",
213         PS_SUBPIC_KERNEL,
214         ps_subpic_kernel_static,
215         sizeof(ps_subpic_kernel_static),
216         NULL
217     }
218 };
219
220 static struct i965_kernel render_kernels_gen5[] = {
221     {
222         "SF",
223         SF_KERNEL,
224         sf_kernel_static_gen5,
225         sizeof(sf_kernel_static_gen5),
226         NULL
227     },
228     {
229         "PS",
230         PS_KERNEL,
231         ps_kernel_static_gen5,
232         sizeof(ps_kernel_static_gen5),
233         NULL
234     },
235
236     {
237         "PS_SUBPIC",
238         PS_SUBPIC_KERNEL,
239         ps_subpic_kernel_static_gen5,
240         sizeof(ps_subpic_kernel_static_gen5),
241         NULL
242     }
243 };
244
245 static struct i965_kernel render_kernels_gen6[] = {
246     {
247         "SF",
248         SF_KERNEL,
249         sf_kernel_static_gen6,
250         sizeof(sf_kernel_static_gen6),
251         NULL
252     },
253     {
254         "PS",
255         PS_KERNEL,
256         ps_kernel_static_gen6,
257         sizeof(ps_kernel_static_gen6),
258         NULL
259     },
260
261     {
262         "PS_SUBPIC",
263         PS_SUBPIC_KERNEL,
264         ps_subpic_kernel_static_gen6,
265         sizeof(ps_subpic_kernel_static_gen6),
266         NULL
267     }
268 };
269
270 static struct i965_kernel render_kernels_gen7[] = {
271     {
272         "SF",
273         SF_KERNEL,
274         sf_kernel_static_gen7,
275         sizeof(sf_kernel_static_gen7),
276         NULL
277     },
278     {
279         "PS",
280         PS_KERNEL,
281         ps_kernel_static_gen7,
282         sizeof(ps_kernel_static_gen7),
283         NULL
284     },
285
286     {
287         "PS_SUBPIC",
288         PS_SUBPIC_KERNEL,
289         ps_subpic_kernel_static_gen7,
290         sizeof(ps_subpic_kernel_static_gen7),
291         NULL
292     }
293 };
294
295 static struct i965_kernel render_kernels_gen7_haswell[] = {
296     {
297         "SF",
298         SF_KERNEL,
299         sf_kernel_static_gen7,
300         sizeof(sf_kernel_static_gen7),
301         NULL
302     },
303     {
304         "PS",
305         PS_KERNEL,
306         ps_kernel_static_gen7_haswell,
307         sizeof(ps_kernel_static_gen7_haswell),
308         NULL
309     },
310
311     {
312         "PS_SUBPIC",
313         PS_SUBPIC_KERNEL,
314         ps_subpic_kernel_static_gen7,
315         sizeof(ps_subpic_kernel_static_gen7),
316         NULL
317     }
318 };
319
320 static struct i965_kernel render_kernels_gen8[] = {
321     {
322         "SF",
323         SF_KERNEL,
324         sf_kernel_static_gen8,
325         sizeof(sf_kernel_static_gen8),
326         NULL
327     },
328     {
329         "PS",
330         PS_KERNEL,
331         ps_kernel_static_gen8,
332         sizeof(ps_kernel_static_gen8),
333         NULL
334     },
335
336     {
337         "PS_SUBPIC",
338         PS_SUBPIC_KERNEL,
339         ps_subpic_kernel_static_gen8,
340         sizeof(ps_subpic_kernel_static_gen8),
341         NULL
342     }
343 };
344
345 #define URB_VS_ENTRIES        8
346 #define URB_VS_ENTRY_SIZE     1
347
348 #define URB_GS_ENTRIES        0
349 #define URB_GS_ENTRY_SIZE     0
350
351 #define URB_CLIP_ENTRIES      0
352 #define URB_CLIP_ENTRY_SIZE   0
353
354 #define URB_SF_ENTRIES        1
355 #define URB_SF_ENTRY_SIZE     2
356
357 #define URB_CS_ENTRIES        4
358 #define URB_CS_ENTRY_SIZE     4
359
360 static float yuv_to_rgb_bt601[3][4] = {
361 {1.164,         0,      1.596,          -0.06275,},
362 {1.164,         -0.392, -0.813,         -0.50196,},
363 {1.164,         2.017,  0,              -0.50196,},
364 };
365
366 static float yuv_to_rgb_bt709[3][4] = {
367 {1.164,         0,      1.793,          -0.06275,},
368 {1.164,         -0.213, -0.533,         -0.50196,},
369 {1.164,         2.112,  0,              -0.50196,},
370 };
371
372 static float yuv_to_rgb_smpte_240[3][4] = {
373 {1.164,         0,      1.794,          -0.06275,},
374 {1.164,         -0.258, -0.5425,        -0.50196,},
375 {1.164,         2.078,  0,              -0.50196,},
376 };
377
378 static void
379 i965_render_vs_unit(VADriverContextP ctx)
380 {
381     struct i965_driver_data *i965 = i965_driver_data(ctx);
382     struct i965_render_state *render_state = &i965->render_state;
383     struct i965_vs_unit_state *vs_state;
384
385     dri_bo_map(render_state->vs.state, 1);
386     assert(render_state->vs.state->virtual);
387     vs_state = render_state->vs.state->virtual;
388     memset(vs_state, 0, sizeof(*vs_state));
389
390     if (IS_IRONLAKE(i965->intel.device_id))
391         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
392     else
393         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
394
395     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
396     vs_state->vs6.vs_enable = 0;
397     vs_state->vs6.vert_cache_disable = 1;
398     
399     dri_bo_unmap(render_state->vs.state);
400 }
401
402 static void
403 i965_render_sf_unit(VADriverContextP ctx)
404 {
405     struct i965_driver_data *i965 = i965_driver_data(ctx);
406     struct i965_render_state *render_state = &i965->render_state;
407     struct i965_sf_unit_state *sf_state;
408
409     dri_bo_map(render_state->sf.state, 1);
410     assert(render_state->sf.state->virtual);
411     sf_state = render_state->sf.state->virtual;
412     memset(sf_state, 0, sizeof(*sf_state));
413
414     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
415     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
416
417     sf_state->sf1.single_program_flow = 1; /* XXX */
418     sf_state->sf1.binding_table_entry_count = 0;
419     sf_state->sf1.thread_priority = 0;
420     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
421     sf_state->sf1.illegal_op_exception_enable = 1;
422     sf_state->sf1.mask_stack_exception_enable = 1;
423     sf_state->sf1.sw_exception_enable = 1;
424
425     /* scratch space is not used in our kernel */
426     sf_state->thread2.per_thread_scratch_space = 0;
427     sf_state->thread2.scratch_space_base_pointer = 0;
428
429     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
430     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
431     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
432     sf_state->thread3.urb_entry_read_offset = 0;
433     sf_state->thread3.dispatch_grf_start_reg = 3;
434
435     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
436     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
437     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
438     sf_state->thread4.stats_enable = 1;
439
440     sf_state->sf5.viewport_transform = 0; /* skip viewport */
441
442     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
443     sf_state->sf6.scissor = 0;
444
445     sf_state->sf7.trifan_pv = 2;
446
447     sf_state->sf6.dest_org_vbias = 0x8;
448     sf_state->sf6.dest_org_hbias = 0x8;
449
450     dri_bo_emit_reloc(render_state->sf.state,
451                       I915_GEM_DOMAIN_INSTRUCTION, 0,
452                       sf_state->thread0.grf_reg_count << 1,
453                       offsetof(struct i965_sf_unit_state, thread0),
454                       render_state->render_kernels[SF_KERNEL].bo);
455
456     dri_bo_unmap(render_state->sf.state);
457 }
458
459 static void 
460 i965_render_sampler(VADriverContextP ctx)
461 {
462     struct i965_driver_data *i965 = i965_driver_data(ctx);
463     struct i965_render_state *render_state = &i965->render_state;
464     struct i965_sampler_state *sampler_state;
465     int i;
466     
467     assert(render_state->wm.sampler_count > 0);
468     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
469
470     dri_bo_map(render_state->wm.sampler, 1);
471     assert(render_state->wm.sampler->virtual);
472     sampler_state = render_state->wm.sampler->virtual;
473     for (i = 0; i < render_state->wm.sampler_count; i++) {
474         memset(sampler_state, 0, sizeof(*sampler_state));
475         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
476         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
478         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state++;
481     }
482
483     dri_bo_unmap(render_state->wm.sampler);
484 }
485 static void
486 i965_subpic_render_wm_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_wm_unit_state *wm_state;
491
492     assert(render_state->wm.sampler);
493
494     dri_bo_map(render_state->wm.state, 1);
495     assert(render_state->wm.state->virtual);
496     wm_state = render_state->wm.state->virtual;
497     memset(wm_state, 0, sizeof(*wm_state));
498
499     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
500     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
501
502     wm_state->thread1.single_program_flow = 1; /* XXX */
503
504     if (IS_IRONLAKE(i965->intel.device_id))
505         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
506     else
507         wm_state->thread1.binding_table_entry_count = 7;
508
509     wm_state->thread2.scratch_space_base_pointer = 0;
510     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
511
512     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
513     wm_state->thread3.const_urb_entry_read_length = 4;
514     wm_state->thread3.const_urb_entry_read_offset = 0;
515     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
516     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
517
518     wm_state->wm4.stats_enable = 0;
519     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
520
521     if (IS_IRONLAKE(i965->intel.device_id)) {
522         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
523     } else {
524         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
525     }
526
527     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
528     wm_state->wm5.thread_dispatch_enable = 1;
529     wm_state->wm5.enable_16_pix = 1;
530     wm_state->wm5.enable_8_pix = 0;
531     wm_state->wm5.early_depth_test = 1;
532
533     dri_bo_emit_reloc(render_state->wm.state,
534                       I915_GEM_DOMAIN_INSTRUCTION, 0,
535                       wm_state->thread0.grf_reg_count << 1,
536                       offsetof(struct i965_wm_unit_state, thread0),
537                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
538
539     dri_bo_emit_reloc(render_state->wm.state,
540                       I915_GEM_DOMAIN_INSTRUCTION, 0,
541                       wm_state->wm4.sampler_count << 2,
542                       offsetof(struct i965_wm_unit_state, wm4),
543                       render_state->wm.sampler);
544
545     dri_bo_unmap(render_state->wm.state);
546 }
547
548
549 static void
550 i965_render_wm_unit(VADriverContextP ctx)
551 {
552     struct i965_driver_data *i965 = i965_driver_data(ctx);
553     struct i965_render_state *render_state = &i965->render_state;
554     struct i965_wm_unit_state *wm_state;
555
556     assert(render_state->wm.sampler);
557
558     dri_bo_map(render_state->wm.state, 1);
559     assert(render_state->wm.state->virtual);
560     wm_state = render_state->wm.state->virtual;
561     memset(wm_state, 0, sizeof(*wm_state));
562
563     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
564     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
565
566     wm_state->thread1.single_program_flow = 1; /* XXX */
567
568     if (IS_IRONLAKE(i965->intel.device_id))
569         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
570     else
571         wm_state->thread1.binding_table_entry_count = 7;
572
573     wm_state->thread2.scratch_space_base_pointer = 0;
574     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
575
576     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
577     wm_state->thread3.const_urb_entry_read_length = 4;
578     wm_state->thread3.const_urb_entry_read_offset = 0;
579     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
580     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
581
582     wm_state->wm4.stats_enable = 0;
583     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
584
585     if (IS_IRONLAKE(i965->intel.device_id)) {
586         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
587     } else {
588         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
589     }
590
591     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
592     wm_state->wm5.thread_dispatch_enable = 1;
593     wm_state->wm5.enable_16_pix = 1;
594     wm_state->wm5.enable_8_pix = 0;
595     wm_state->wm5.early_depth_test = 1;
596
597     dri_bo_emit_reloc(render_state->wm.state,
598                       I915_GEM_DOMAIN_INSTRUCTION, 0,
599                       wm_state->thread0.grf_reg_count << 1,
600                       offsetof(struct i965_wm_unit_state, thread0),
601                       render_state->render_kernels[PS_KERNEL].bo);
602
603     dri_bo_emit_reloc(render_state->wm.state,
604                       I915_GEM_DOMAIN_INSTRUCTION, 0,
605                       wm_state->wm4.sampler_count << 2,
606                       offsetof(struct i965_wm_unit_state, wm4),
607                       render_state->wm.sampler);
608
609     dri_bo_unmap(render_state->wm.state);
610 }
611
612 static void 
613 i965_render_cc_viewport(VADriverContextP ctx)
614 {
615     struct i965_driver_data *i965 = i965_driver_data(ctx);
616     struct i965_render_state *render_state = &i965->render_state;
617     struct i965_cc_viewport *cc_viewport;
618
619     dri_bo_map(render_state->cc.viewport, 1);
620     assert(render_state->cc.viewport->virtual);
621     cc_viewport = render_state->cc.viewport->virtual;
622     memset(cc_viewport, 0, sizeof(*cc_viewport));
623     
624     cc_viewport->min_depth = -1.e35;
625     cc_viewport->max_depth = 1.e35;
626
627     dri_bo_unmap(render_state->cc.viewport);
628 }
629
630 static void 
631 i965_subpic_render_cc_unit(VADriverContextP ctx)
632 {
633     struct i965_driver_data *i965 = i965_driver_data(ctx);
634     struct i965_render_state *render_state = &i965->render_state;
635     struct i965_cc_unit_state *cc_state;
636
637     assert(render_state->cc.viewport);
638
639     dri_bo_map(render_state->cc.state, 1);
640     assert(render_state->cc.state->virtual);
641     cc_state = render_state->cc.state->virtual;
642     memset(cc_state, 0, sizeof(*cc_state));
643
644     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
645     cc_state->cc2.depth_test = 0;       /* disable depth test */
646     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
647     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
648     cc_state->cc3.blend_enable = 1;     /* enable color blend */
649     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
650     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
651     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
652     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
653
654     cc_state->cc5.dither_enable = 0;    /* disable dither */
655     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
656     cc_state->cc5.statistics_enable = 1;
657     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
658     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
659     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660
661     cc_state->cc6.clamp_post_alpha_blend = 0; 
662     cc_state->cc6.clamp_pre_alpha_blend  =0; 
663     
664     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
665     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
666     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
667     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
668    
669     /*alpha test reference*/
670     cc_state->cc7.alpha_ref.f =0.0 ;
671
672
673     dri_bo_emit_reloc(render_state->cc.state,
674                       I915_GEM_DOMAIN_INSTRUCTION, 0,
675                       0,
676                       offsetof(struct i965_cc_unit_state, cc4),
677                       render_state->cc.viewport);
678
679     dri_bo_unmap(render_state->cc.state);
680 }
681
682
683 static void 
684 i965_render_cc_unit(VADriverContextP ctx)
685 {
686     struct i965_driver_data *i965 = i965_driver_data(ctx);
687     struct i965_render_state *render_state = &i965->render_state;
688     struct i965_cc_unit_state *cc_state;
689
690     assert(render_state->cc.viewport);
691
692     dri_bo_map(render_state->cc.state, 1);
693     assert(render_state->cc.state->virtual);
694     cc_state = render_state->cc.state->virtual;
695     memset(cc_state, 0, sizeof(*cc_state));
696
697     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
698     cc_state->cc2.depth_test = 0;       /* disable depth test */
699     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
700     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
701     cc_state->cc3.blend_enable = 0;     /* disable color blend */
702     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
703     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
704
705     cc_state->cc5.dither_enable = 0;    /* disable dither */
706     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
707     cc_state->cc5.statistics_enable = 1;
708     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
709     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
710     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
711
712     dri_bo_emit_reloc(render_state->cc.state,
713                       I915_GEM_DOMAIN_INSTRUCTION, 0,
714                       0,
715                       offsetof(struct i965_cc_unit_state, cc4),
716                       render_state->cc.viewport);
717
718     dri_bo_unmap(render_state->cc.state);
719 }
720
721 static void
722 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
723 {
724     switch (tiling) {
725     case I915_TILING_NONE:
726         ss->ss3.tiled_surface = 0;
727         ss->ss3.tile_walk = 0;
728         break;
729     case I915_TILING_X:
730         ss->ss3.tiled_surface = 1;
731         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
732         break;
733     case I915_TILING_Y:
734         ss->ss3.tiled_surface = 1;
735         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
736         break;
737     }
738 }
739
740 static void
741 i965_render_set_surface_state(
742     struct i965_surface_state *ss,
743     dri_bo                    *bo,
744     unsigned long              offset,
745     unsigned int               width,
746     unsigned int               height,
747     unsigned int               pitch,
748     unsigned int               format,
749     unsigned int               flags
750 )
751 {
752     unsigned int tiling;
753     unsigned int swizzle;
754
755     memset(ss, 0, sizeof(*ss));
756
757     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
758     case I965_PP_FLAG_BOTTOM_FIELD:
759         ss->ss0.vert_line_stride_ofs = 1;
760         /* fall-through */
761     case I965_PP_FLAG_TOP_FIELD:
762         ss->ss0.vert_line_stride = 1;
763         height /= 2;
764         break;
765     }
766
767     ss->ss0.surface_type = I965_SURFACE_2D;
768     ss->ss0.surface_format = format;
769     ss->ss0.color_blend = 1;
770
771     ss->ss1.base_addr = bo->offset + offset;
772
773     ss->ss2.width = width - 1;
774     ss->ss2.height = height - 1;
775
776     ss->ss3.pitch = pitch - 1;
777
778     dri_bo_get_tiling(bo, &tiling, &swizzle);
779     i965_render_set_surface_tiling(ss, tiling);
780 }
781
782 static void
783 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
784 {
785    switch (tiling) {
786    case I915_TILING_NONE:
787       ss->ss0.tiled_surface = 0;
788       ss->ss0.tile_walk = 0;
789       break;
790    case I915_TILING_X:
791       ss->ss0.tiled_surface = 1;
792       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
793       break;
794    case I915_TILING_Y:
795       ss->ss0.tiled_surface = 1;
796       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
797       break;
798    }
799 }
800
801 static void
802 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
803 {
804    switch (tiling) {
805    case I915_TILING_NONE:
806       ss->ss0.tiled_surface = 0;
807       ss->ss0.tile_walk = 0;
808       break;
809    case I915_TILING_X:
810       ss->ss0.tiled_surface = 1;
811       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
812       break;
813    case I915_TILING_Y:
814       ss->ss0.tiled_surface = 1;
815       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
816       break;
817    }
818 }
819
820 /* Set "Shader Channel Select" */
821 void
822 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
823 {
824     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
825     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
826     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
827     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
828 }
829
830 /* Set "Shader Channel Select" for GEN8+ */
831 void
832 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
833 {
834     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
835     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
836     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
837     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
838 }
839
840 static void
841 gen7_render_set_surface_state(
842     struct gen7_surface_state *ss,
843     dri_bo                    *bo,
844     unsigned long              offset,
845     int                        width,
846     int                        height,
847     int                        pitch,
848     int                        format,
849     unsigned int               flags
850 )
851 {
852     unsigned int tiling;
853     unsigned int swizzle;
854
855     memset(ss, 0, sizeof(*ss));
856
857     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
858     case I965_PP_FLAG_BOTTOM_FIELD:
859         ss->ss0.vert_line_stride_ofs = 1;
860         /* fall-through */
861     case I965_PP_FLAG_TOP_FIELD:
862         ss->ss0.vert_line_stride = 1;
863         height /= 2;
864         break;
865     }
866
867     ss->ss0.surface_type = I965_SURFACE_2D;
868     ss->ss0.surface_format = format;
869
870     ss->ss1.base_addr = bo->offset + offset;
871
872     ss->ss2.width = width - 1;
873     ss->ss2.height = height - 1;
874
875     ss->ss3.pitch = pitch - 1;
876
877     dri_bo_get_tiling(bo, &tiling, &swizzle);
878     gen7_render_set_surface_tiling(ss, tiling);
879 }
880
881
882 static void
883 gen8_render_set_surface_state(
884     struct gen8_surface_state *ss,
885     dri_bo                    *bo,
886     unsigned long              offset,
887     int                        width,
888     int                        height,
889     int                        pitch,
890     int                        format,
891     unsigned int               flags
892 )
893 {
894     unsigned int tiling;
895     unsigned int swizzle;
896
897     memset(ss, 0, sizeof(*ss));
898
899     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
900     case I965_PP_FLAG_BOTTOM_FIELD:
901         ss->ss0.vert_line_stride_ofs = 1;
902         /* fall-through */
903     case I965_PP_FLAG_TOP_FIELD:
904         ss->ss0.vert_line_stride = 1;
905         height /= 2;
906         break;
907     }
908
909     ss->ss0.surface_type = I965_SURFACE_2D;
910     ss->ss0.surface_format = format;
911
912     ss->ss8.base_addr = bo->offset + offset;
913
914     ss->ss2.width = width - 1;
915     ss->ss2.height = height - 1;
916
917     ss->ss3.pitch = pitch - 1;
918
919     dri_bo_get_tiling(bo, &tiling, &swizzle);
920     gen8_render_set_surface_tiling(ss, tiling);
921 }
922
923 static void
924 i965_render_src_surface_state(
925     VADriverContextP ctx, 
926     int              index,
927     dri_bo          *region,
928     unsigned long    offset,
929     int              w,
930     int              h,
931     int              pitch,
932     int              format,
933     unsigned int     flags
934 )
935 {
936     struct i965_driver_data *i965 = i965_driver_data(ctx);  
937     struct i965_render_state *render_state = &i965->render_state;
938     void *ss;
939     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
940
941     assert(index < MAX_RENDER_SURFACES);
942
943     dri_bo_map(ss_bo, 1);
944     assert(ss_bo->virtual);
945     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
946
947     if (IS_GEN8(i965->intel.device_id)) {
948         gen8_render_set_surface_state(ss,
949                                       region, offset,
950                                       w, h,
951                                       pitch, format, flags);
952         gen8_render_set_surface_scs(ss);
953         dri_bo_emit_reloc(ss_bo,
954                           I915_GEM_DOMAIN_SAMPLER, 0,
955                           offset,
956                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
957                           region);
958     } else  if (IS_GEN7(i965->intel.device_id)) {
959         gen7_render_set_surface_state(ss,
960                                       region, offset,
961                                       w, h,
962                                       pitch, format, flags);
963         if (IS_HASWELL(i965->intel.device_id))
964             gen7_render_set_surface_scs(ss);
965         dri_bo_emit_reloc(ss_bo,
966                           I915_GEM_DOMAIN_SAMPLER, 0,
967                           offset,
968                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
969                           region);
970     } else {
971         i965_render_set_surface_state(ss,
972                                       region, offset,
973                                       w, h,
974                                       pitch, format, flags);
975         dri_bo_emit_reloc(ss_bo,
976                           I915_GEM_DOMAIN_SAMPLER, 0,
977                           offset,
978                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
979                           region);
980     }
981
982     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
983     dri_bo_unmap(ss_bo);
984     render_state->wm.sampler_count++;
985 }
986
987 static void
988 i965_render_src_surfaces_state(
989     VADriverContextP ctx,
990     struct object_surface *obj_surface,
991     unsigned int     flags
992 )
993 {
994     int region_pitch;
995     int rw, rh;
996     dri_bo *region;
997
998     region_pitch = obj_surface->width;
999     rw = obj_surface->orig_width;
1000     rh = obj_surface->orig_height;
1001     region = obj_surface->bo;
1002
1003     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1004     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1005
1006     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1007         i965_render_src_surface_state(ctx, 3, region,
1008                                       region_pitch * obj_surface->y_cb_offset,
1009                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1010                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1011         i965_render_src_surface_state(ctx, 4, region,
1012                                       region_pitch * obj_surface->y_cb_offset,
1013                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1014                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1015     } else {
1016         i965_render_src_surface_state(ctx, 3, region,
1017                                       region_pitch * obj_surface->y_cb_offset,
1018                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1019                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1020         i965_render_src_surface_state(ctx, 4, region,
1021                                       region_pitch * obj_surface->y_cb_offset,
1022                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1023                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1024         i965_render_src_surface_state(ctx, 5, region,
1025                                       region_pitch * obj_surface->y_cr_offset,
1026                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1027                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1028         i965_render_src_surface_state(ctx, 6, region,
1029                                       region_pitch * obj_surface->y_cr_offset,
1030                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1031                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1032     }
1033 }
1034
1035 static void
1036 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1037                                       struct object_surface *obj_surface)
1038 {
1039     dri_bo *subpic_region;
1040     unsigned int index = obj_surface->subpic_render_idx;
1041     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1042     struct object_image *obj_image = obj_subpic->obj_image;
1043
1044     assert(obj_surface);
1045     assert(obj_surface->bo);
1046     subpic_region = obj_image->bo;
1047     /*subpicture surface*/
1048     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1049     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1050 }
1051
1052 static void
1053 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1054 {
1055     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1056     struct i965_render_state *render_state = &i965->render_state;
1057     struct intel_region *dest_region = render_state->draw_region;
1058     void *ss;
1059     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1060     int format;
1061     assert(index < MAX_RENDER_SURFACES);
1062
1063     if (dest_region->cpp == 2) {
1064         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1065     } else {
1066         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1067     }
1068
1069     dri_bo_map(ss_bo, 1);
1070     assert(ss_bo->virtual);
1071     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1072
1073     if (IS_GEN8(i965->intel.device_id)) {
1074         gen8_render_set_surface_state(ss,
1075                                       dest_region->bo, 0,
1076                                       dest_region->width, dest_region->height,
1077                                       dest_region->pitch, format, 0);
1078         gen8_render_set_surface_scs(ss);
1079         dri_bo_emit_reloc(ss_bo,
1080                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1081                           0,
1082                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1083                           dest_region->bo);
1084     } else if (IS_GEN7(i965->intel.device_id)) {
1085         gen7_render_set_surface_state(ss,
1086                                       dest_region->bo, 0,
1087                                       dest_region->width, dest_region->height,
1088                                       dest_region->pitch, format, 0);
1089         if (IS_HASWELL(i965->intel.device_id))
1090             gen7_render_set_surface_scs(ss);
1091         dri_bo_emit_reloc(ss_bo,
1092                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1093                           0,
1094                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1095                           dest_region->bo);
1096     } else {
1097         i965_render_set_surface_state(ss,
1098                                       dest_region->bo, 0,
1099                                       dest_region->width, dest_region->height,
1100                                       dest_region->pitch, format, 0);
1101         dri_bo_emit_reloc(ss_bo,
1102                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1103                           0,
1104                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1105                           dest_region->bo);
1106     }
1107
1108     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1109     dri_bo_unmap(ss_bo);
1110 }
1111
1112 static void
1113 i965_fill_vertex_buffer(
1114     VADriverContextP ctx,
1115     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1116     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1117 )
1118 {
1119     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1120     float vb[12];
1121
1122     enum { X1, Y1, X2, Y2 };
1123
1124     static const unsigned int g_rotation_indices[][6] = {
1125         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1126         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1127         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1128         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1129     };
1130
1131     const unsigned int * const rotation_indices =
1132         g_rotation_indices[i965->rotation_attrib->value];
1133
1134     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1135     vb[1]  = tex_coords[rotation_indices[1]];
1136     vb[2]  = vid_coords[X2];
1137     vb[3]  = vid_coords[Y2];
1138
1139     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1140     vb[5]  = tex_coords[rotation_indices[3]];
1141     vb[6]  = vid_coords[X1];
1142     vb[7]  = vid_coords[Y2];
1143
1144     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1145     vb[9]  = tex_coords[rotation_indices[5]];
1146     vb[10] = vid_coords[X1];
1147     vb[11] = vid_coords[Y1];
1148
1149     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1150 }
1151
1152 static void 
1153 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1154                                  struct object_surface *obj_surface,
1155                                  const VARectangle *output_rect)
1156 {    
1157     unsigned int index = obj_surface->subpic_render_idx;
1158     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1159     float tex_coords[4], vid_coords[4];
1160     VARectangle dst_rect;
1161
1162     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1163         dst_rect = obj_subpic->dst_rect;
1164     else {
1165         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1166         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1167         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1168         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1169         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1170         dst_rect.height = sy * obj_subpic->dst_rect.height;
1171     }
1172
1173     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1174     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1175     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1176     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1177
1178     vid_coords[0] = dst_rect.x;
1179     vid_coords[1] = dst_rect.y;
1180     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1181     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1182
1183     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1184 }
1185
1186 static void 
1187 i965_render_upload_vertex(
1188     VADriverContextP   ctx,
1189     struct object_surface *obj_surface,
1190     const VARectangle *src_rect,
1191     const VARectangle *dst_rect
1192 )
1193 {
1194     struct i965_driver_data *i965 = i965_driver_data(ctx);
1195     struct i965_render_state *render_state = &i965->render_state;
1196     struct intel_region *dest_region = render_state->draw_region;
1197     float tex_coords[4], vid_coords[4];
1198     int width, height;
1199
1200     width  = obj_surface->orig_width;
1201     height = obj_surface->orig_height;
1202
1203     tex_coords[0] = (float)src_rect->x / width;
1204     tex_coords[1] = (float)src_rect->y / height;
1205     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1206     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1207
1208     vid_coords[0] = dest_region->x + dst_rect->x;
1209     vid_coords[1] = dest_region->y + dst_rect->y;
1210     vid_coords[2] = vid_coords[0] + dst_rect->width;
1211     vid_coords[3] = vid_coords[1] + dst_rect->height;
1212
1213     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1214 }
1215
1216 #define PI  3.1415926
1217
1218 static void
1219 i965_render_upload_constants(VADriverContextP ctx,
1220                              struct object_surface *obj_surface,
1221                              unsigned int flags)
1222 {
1223     struct i965_driver_data *i965 = i965_driver_data(ctx);
1224     struct i965_render_state *render_state = &i965->render_state;
1225     unsigned short *constant_buffer;
1226     float *color_balance_base;
1227     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1228     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1229     float hue = (float)i965->hue_attrib->value / 180 * PI;
1230     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1231     float *yuv_to_rgb;
1232     unsigned int color_flag;
1233
1234     dri_bo_map(render_state->curbe.bo, 1);
1235     assert(render_state->curbe.bo->virtual);
1236     constant_buffer = render_state->curbe.bo->virtual;
1237
1238     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1239         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1240
1241         constant_buffer[0] = 2;
1242     } else {
1243         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1244             constant_buffer[0] = 1;
1245         else
1246             constant_buffer[0] = 0;
1247     }
1248
1249     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1250         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1251         i965->hue_attrib->value == DEFAULT_HUE &&
1252         i965->saturation_attrib->value == DEFAULT_SATURATION)
1253         constant_buffer[1] = 1; /* skip color balance transformation */
1254     else
1255         constant_buffer[1] = 0;
1256
1257     color_balance_base = (float *)constant_buffer + 4;
1258     *color_balance_base++ = contrast;
1259     *color_balance_base++ = brightness;
1260     *color_balance_base++ = cos(hue) * contrast * saturation;
1261     *color_balance_base++ = sin(hue) * contrast * saturation;
1262
1263     color_flag = flags & VA_SRC_COLOR_MASK;
1264     yuv_to_rgb = (float *)constant_buffer + 8;
1265     if (color_flag == VA_SRC_BT709)
1266         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1267     else if (color_flag == VA_SRC_SMPTE_240)
1268         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1269     else
1270         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1271
1272     dri_bo_unmap(render_state->curbe.bo);
1273 }
1274
1275 static void
1276 i965_subpic_render_upload_constants(VADriverContextP ctx,
1277                                     struct object_surface *obj_surface)
1278 {
1279     struct i965_driver_data *i965 = i965_driver_data(ctx);
1280     struct i965_render_state *render_state = &i965->render_state;
1281     float *constant_buffer;
1282     float global_alpha = 1.0;
1283     unsigned int index = obj_surface->subpic_render_idx;
1284     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1285     
1286     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1287         global_alpha = obj_subpic->global_alpha;
1288     }
1289
1290     dri_bo_map(render_state->curbe.bo, 1);
1291
1292     assert(render_state->curbe.bo->virtual);
1293     constant_buffer = render_state->curbe.bo->virtual;
1294     *constant_buffer = global_alpha;
1295
1296     dri_bo_unmap(render_state->curbe.bo);
1297 }
1298  
1299 static void
1300 i965_surface_render_state_setup(
1301     VADriverContextP   ctx,
1302     struct object_surface *obj_surface,
1303     const VARectangle *src_rect,
1304     const VARectangle *dst_rect,
1305     unsigned int       flags
1306 )
1307 {
1308     i965_render_vs_unit(ctx);
1309     i965_render_sf_unit(ctx);
1310     i965_render_dest_surface_state(ctx, 0);
1311     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1312     i965_render_sampler(ctx);
1313     i965_render_wm_unit(ctx);
1314     i965_render_cc_viewport(ctx);
1315     i965_render_cc_unit(ctx);
1316     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1317     i965_render_upload_constants(ctx, obj_surface, flags);
1318 }
1319
1320 static void
1321 i965_subpic_render_state_setup(
1322     VADriverContextP   ctx,
1323     struct object_surface *obj_surface,
1324     const VARectangle *src_rect,
1325     const VARectangle *dst_rect
1326 )
1327 {
1328     i965_render_vs_unit(ctx);
1329     i965_render_sf_unit(ctx);
1330     i965_render_dest_surface_state(ctx, 0);
1331     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1332     i965_render_sampler(ctx);
1333     i965_subpic_render_wm_unit(ctx);
1334     i965_render_cc_viewport(ctx);
1335     i965_subpic_render_cc_unit(ctx);
1336     i965_subpic_render_upload_constants(ctx, obj_surface);
1337     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1338 }
1339
1340
1341 static void
1342 i965_render_pipeline_select(VADriverContextP ctx)
1343 {
1344     struct i965_driver_data *i965 = i965_driver_data(ctx);
1345     struct intel_batchbuffer *batch = i965->batch;
1346  
1347     BEGIN_BATCH(batch, 1);
1348     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1349     ADVANCE_BATCH(batch);
1350 }
1351
1352 static void
1353 i965_render_state_sip(VADriverContextP ctx)
1354 {
1355     struct i965_driver_data *i965 = i965_driver_data(ctx);
1356     struct intel_batchbuffer *batch = i965->batch;
1357
1358     BEGIN_BATCH(batch, 2);
1359     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1360     OUT_BATCH(batch, 0);
1361     ADVANCE_BATCH(batch);
1362 }
1363
1364 static void
1365 i965_render_state_base_address(VADriverContextP ctx)
1366 {
1367     struct i965_driver_data *i965 = i965_driver_data(ctx);
1368     struct intel_batchbuffer *batch = i965->batch;
1369     struct i965_render_state *render_state = &i965->render_state;
1370
1371     if (IS_IRONLAKE(i965->intel.device_id)) {
1372         BEGIN_BATCH(batch, 8);
1373         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1374         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1375         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1376         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1377         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1378         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1379         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1381         ADVANCE_BATCH(batch);
1382     } else {
1383         BEGIN_BATCH(batch, 6);
1384         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1385         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1387         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1388         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1389         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1390         ADVANCE_BATCH(batch);
1391     }
1392 }
1393
1394 static void
1395 i965_render_binding_table_pointers(VADriverContextP ctx)
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct intel_batchbuffer *batch = i965->batch;
1399
1400     BEGIN_BATCH(batch, 6);
1401     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1402     OUT_BATCH(batch, 0); /* vs */
1403     OUT_BATCH(batch, 0); /* gs */
1404     OUT_BATCH(batch, 0); /* clip */
1405     OUT_BATCH(batch, 0); /* sf */
1406     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1407     ADVANCE_BATCH(batch);
1408 }
1409
1410 static void 
1411 i965_render_constant_color(VADriverContextP ctx)
1412 {
1413     struct i965_driver_data *i965 = i965_driver_data(ctx);
1414     struct intel_batchbuffer *batch = i965->batch;
1415
1416     BEGIN_BATCH(batch, 5);
1417     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1418     OUT_BATCH(batch, float_to_uint(1.0));
1419     OUT_BATCH(batch, float_to_uint(0.0));
1420     OUT_BATCH(batch, float_to_uint(1.0));
1421     OUT_BATCH(batch, float_to_uint(1.0));
1422     ADVANCE_BATCH(batch);
1423 }
1424
1425 static void
1426 i965_render_pipelined_pointers(VADriverContextP ctx)
1427 {
1428     struct i965_driver_data *i965 = i965_driver_data(ctx);
1429     struct intel_batchbuffer *batch = i965->batch;
1430     struct i965_render_state *render_state = &i965->render_state;
1431
1432     BEGIN_BATCH(batch, 7);
1433     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1434     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1435     OUT_BATCH(batch, 0);  /* disable GS */
1436     OUT_BATCH(batch, 0);  /* disable CLIP */
1437     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1438     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1439     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1440     ADVANCE_BATCH(batch);
1441 }
1442
1443 static void
1444 i965_render_urb_layout(VADriverContextP ctx)
1445 {
1446     struct i965_driver_data *i965 = i965_driver_data(ctx);
1447     struct intel_batchbuffer *batch = i965->batch;
1448     int urb_vs_start, urb_vs_size;
1449     int urb_gs_start, urb_gs_size;
1450     int urb_clip_start, urb_clip_size;
1451     int urb_sf_start, urb_sf_size;
1452     int urb_cs_start, urb_cs_size;
1453
1454     urb_vs_start = 0;
1455     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1456     urb_gs_start = urb_vs_start + urb_vs_size;
1457     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1458     urb_clip_start = urb_gs_start + urb_gs_size;
1459     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1460     urb_sf_start = urb_clip_start + urb_clip_size;
1461     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1462     urb_cs_start = urb_sf_start + urb_sf_size;
1463     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1464
1465     BEGIN_BATCH(batch, 3);
1466     OUT_BATCH(batch, 
1467               CMD_URB_FENCE |
1468               UF0_CS_REALLOC |
1469               UF0_SF_REALLOC |
1470               UF0_CLIP_REALLOC |
1471               UF0_GS_REALLOC |
1472               UF0_VS_REALLOC |
1473               1);
1474     OUT_BATCH(batch, 
1475               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1476               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1477               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1478     OUT_BATCH(batch,
1479               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1480               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1481     ADVANCE_BATCH(batch);
1482 }
1483
1484 static void 
1485 i965_render_cs_urb_layout(VADriverContextP ctx)
1486 {
1487     struct i965_driver_data *i965 = i965_driver_data(ctx);
1488     struct intel_batchbuffer *batch = i965->batch;
1489
1490     BEGIN_BATCH(batch, 2);
1491     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1492     OUT_BATCH(batch,
1493               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1494               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1495     ADVANCE_BATCH(batch);
1496 }
1497
1498 static void
1499 i965_render_constant_buffer(VADriverContextP ctx)
1500 {
1501     struct i965_driver_data *i965 = i965_driver_data(ctx);
1502     struct intel_batchbuffer *batch = i965->batch;
1503     struct i965_render_state *render_state = &i965->render_state;
1504
1505     BEGIN_BATCH(batch, 2);
1506     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1507     OUT_RELOC(batch, render_state->curbe.bo,
1508               I915_GEM_DOMAIN_INSTRUCTION, 0,
1509               URB_CS_ENTRY_SIZE - 1);
1510     ADVANCE_BATCH(batch);    
1511 }
1512
1513 static void
1514 i965_render_drawing_rectangle(VADriverContextP ctx)
1515 {
1516     struct i965_driver_data *i965 = i965_driver_data(ctx);
1517     struct intel_batchbuffer *batch = i965->batch;
1518     struct i965_render_state *render_state = &i965->render_state;
1519     struct intel_region *dest_region = render_state->draw_region;
1520
1521     BEGIN_BATCH(batch, 4);
1522     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1523     OUT_BATCH(batch, 0x00000000);
1524     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1525     OUT_BATCH(batch, 0x00000000);         
1526     ADVANCE_BATCH(batch);
1527 }
1528
1529 static void
1530 i965_render_vertex_elements(VADriverContextP ctx)
1531 {
1532     struct i965_driver_data *i965 = i965_driver_data(ctx);
1533     struct intel_batchbuffer *batch = i965->batch;
1534
1535     if (IS_IRONLAKE(i965->intel.device_id)) {
1536         BEGIN_BATCH(batch, 5);
1537         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1538         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1539         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1540                   VE0_VALID |
1541                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1542                   (0 << VE0_OFFSET_SHIFT));
1543         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1544                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1545                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1546                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1547         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1548         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1549                   VE0_VALID |
1550                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1551                   (8 << VE0_OFFSET_SHIFT));
1552         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1553                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1554                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1555                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1556         ADVANCE_BATCH(batch);
1557     } else {
1558         BEGIN_BATCH(batch, 5);
1559         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1560         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1561         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1562                   VE0_VALID |
1563                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1564                   (0 << VE0_OFFSET_SHIFT));
1565         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1566                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1567                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1568                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1569                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1570         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1571         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1572                   VE0_VALID |
1573                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1574                   (8 << VE0_OFFSET_SHIFT));
1575         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1576                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1577                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1578                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1579                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1580         ADVANCE_BATCH(batch);
1581     }
1582 }
1583
1584 static void
1585 i965_render_upload_image_palette(
1586     VADriverContextP ctx,
1587     struct object_image *obj_image,
1588     unsigned int     alpha
1589 )
1590 {
1591     struct i965_driver_data *i965 = i965_driver_data(ctx);
1592     struct intel_batchbuffer *batch = i965->batch;
1593     unsigned int i;
1594
1595     assert(obj_image);
1596
1597     if (!obj_image)
1598         return;
1599
1600     if (obj_image->image.num_palette_entries == 0)
1601         return;
1602
1603     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1604     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1605     /*fill palette*/
1606     //int32_t out[16]; //0-23:color 23-31:alpha
1607     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1608         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1609     ADVANCE_BATCH(batch);
1610 }
1611
1612 static void
1613 i965_render_startup(VADriverContextP ctx)
1614 {
1615     struct i965_driver_data *i965 = i965_driver_data(ctx);
1616     struct intel_batchbuffer *batch = i965->batch;
1617     struct i965_render_state *render_state = &i965->render_state;
1618
1619     BEGIN_BATCH(batch, 11);
1620     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1621     OUT_BATCH(batch, 
1622               (0 << VB0_BUFFER_INDEX_SHIFT) |
1623               VB0_VERTEXDATA |
1624               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1625     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1626
1627     if (IS_IRONLAKE(i965->intel.device_id))
1628         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1629     else
1630         OUT_BATCH(batch, 3);
1631
1632     OUT_BATCH(batch, 0);
1633
1634     OUT_BATCH(batch, 
1635               CMD_3DPRIMITIVE |
1636               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1637               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1638               (0 << 9) |
1639               4);
1640     OUT_BATCH(batch, 3); /* vertex count per instance */
1641     OUT_BATCH(batch, 0); /* start vertex offset */
1642     OUT_BATCH(batch, 1); /* single instance */
1643     OUT_BATCH(batch, 0); /* start instance location */
1644     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1645     ADVANCE_BATCH(batch);
1646 }
1647
1648 static void 
1649 i965_clear_dest_region(VADriverContextP ctx)
1650 {
1651     struct i965_driver_data *i965 = i965_driver_data(ctx);
1652     struct intel_batchbuffer *batch = i965->batch;
1653     struct i965_render_state *render_state = &i965->render_state;
1654     struct intel_region *dest_region = render_state->draw_region;
1655     unsigned int blt_cmd, br13;
1656     int pitch;
1657
1658     blt_cmd = XY_COLOR_BLT_CMD;
1659     br13 = 0xf0 << 16;
1660     pitch = dest_region->pitch;
1661
1662     if (dest_region->cpp == 4) {
1663         br13 |= BR13_8888;
1664         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1665     } else {
1666         assert(dest_region->cpp == 2);
1667         br13 |= BR13_565;
1668     }
1669
1670     if (dest_region->tiling != I915_TILING_NONE) {
1671         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1672         pitch /= 4;
1673     }
1674
1675     br13 |= pitch;
1676
1677     if (IS_GEN6(i965->intel.device_id) ||
1678         IS_GEN7(i965->intel.device_id) ||
1679         IS_GEN8(i965->intel.device_id)) {
1680         intel_batchbuffer_start_atomic_blt(batch, 24);
1681         BEGIN_BLT_BATCH(batch, 6);
1682     } else {
1683         intel_batchbuffer_start_atomic(batch, 24);
1684         BEGIN_BATCH(batch, 6);
1685     }
1686
1687     OUT_BATCH(batch, blt_cmd);
1688     OUT_BATCH(batch, br13);
1689     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1690     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1691               (dest_region->x + dest_region->width));
1692     OUT_RELOC(batch, dest_region->bo, 
1693               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1694               0);
1695     OUT_BATCH(batch, 0x0);
1696     ADVANCE_BATCH(batch);
1697     intel_batchbuffer_end_atomic(batch);
1698 }
1699
1700 static void 
1701 gen8_clear_dest_region(VADriverContextP ctx)
1702 {
1703     struct i965_driver_data *i965 = i965_driver_data(ctx);
1704     struct intel_batchbuffer *batch = i965->batch;
1705     struct i965_render_state *render_state = &i965->render_state;
1706     struct intel_region *dest_region = render_state->draw_region;
1707     unsigned int blt_cmd, br13;
1708     int pitch;
1709
1710     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
1711     br13 = 0xf0 << 16;
1712     pitch = dest_region->pitch;
1713
1714     if (dest_region->cpp == 4) {
1715         br13 |= BR13_8888;
1716         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1717     } else {
1718         assert(dest_region->cpp == 2);
1719         br13 |= BR13_565;
1720     }
1721
1722     if (dest_region->tiling != I915_TILING_NONE) {
1723         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1724         pitch /= 4;
1725     }
1726
1727     br13 |= pitch;
1728
1729     intel_batchbuffer_start_atomic_blt(batch, 24);
1730     BEGIN_BLT_BATCH(batch, 7);
1731
1732     OUT_BATCH(batch, blt_cmd);
1733     OUT_BATCH(batch, br13);
1734     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1735     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1736               (dest_region->x + dest_region->width));
1737     OUT_RELOC(batch, dest_region->bo, 
1738               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1739               0);
1740     OUT_BATCH(batch, 0x0);
1741     OUT_BATCH(batch, 0x0);
1742     ADVANCE_BATCH(batch);
1743     intel_batchbuffer_end_atomic(batch);
1744 }
1745
1746 static void
1747 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1748 {
1749     struct i965_driver_data *i965 = i965_driver_data(ctx);
1750     struct intel_batchbuffer *batch = i965->batch;
1751
1752     i965_clear_dest_region(ctx);
1753     intel_batchbuffer_start_atomic(batch, 0x1000);
1754     intel_batchbuffer_emit_mi_flush(batch);
1755     i965_render_pipeline_select(ctx);
1756     i965_render_state_sip(ctx);
1757     i965_render_state_base_address(ctx);
1758     i965_render_binding_table_pointers(ctx);
1759     i965_render_constant_color(ctx);
1760     i965_render_pipelined_pointers(ctx);
1761     i965_render_urb_layout(ctx);
1762     i965_render_cs_urb_layout(ctx);
1763     i965_render_constant_buffer(ctx);
1764     i965_render_drawing_rectangle(ctx);
1765     i965_render_vertex_elements(ctx);
1766     i965_render_startup(ctx);
1767     intel_batchbuffer_end_atomic(batch);
1768 }
1769
1770 static void
1771 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1772 {
1773     struct i965_driver_data *i965 = i965_driver_data(ctx);
1774     struct intel_batchbuffer *batch = i965->batch;
1775
1776     intel_batchbuffer_start_atomic(batch, 0x1000);
1777     intel_batchbuffer_emit_mi_flush(batch);
1778     i965_render_pipeline_select(ctx);
1779     i965_render_state_sip(ctx);
1780     i965_render_state_base_address(ctx);
1781     i965_render_binding_table_pointers(ctx);
1782     i965_render_constant_color(ctx);
1783     i965_render_pipelined_pointers(ctx);
1784     i965_render_urb_layout(ctx);
1785     i965_render_cs_urb_layout(ctx);
1786     i965_render_constant_buffer(ctx);
1787     i965_render_drawing_rectangle(ctx);
1788     i965_render_vertex_elements(ctx);
1789     i965_render_startup(ctx);
1790     intel_batchbuffer_end_atomic(batch);
1791 }
1792
1793
1794 static void 
1795 i965_render_initialize(VADriverContextP ctx)
1796 {
1797     struct i965_driver_data *i965 = i965_driver_data(ctx);
1798     struct i965_render_state *render_state = &i965->render_state;
1799     dri_bo *bo;
1800
1801     /* VERTEX BUFFER */
1802     dri_bo_unreference(render_state->vb.vertex_buffer);
1803     bo = dri_bo_alloc(i965->intel.bufmgr,
1804                       "vertex buffer",
1805                       4096,
1806                       4096);
1807     assert(bo);
1808     render_state->vb.vertex_buffer = bo;
1809
1810     /* VS */
1811     dri_bo_unreference(render_state->vs.state);
1812     bo = dri_bo_alloc(i965->intel.bufmgr,
1813                       "vs state",
1814                       sizeof(struct i965_vs_unit_state),
1815                       64);
1816     assert(bo);
1817     render_state->vs.state = bo;
1818
1819     /* GS */
1820     /* CLIP */
1821     /* SF */
1822     dri_bo_unreference(render_state->sf.state);
1823     bo = dri_bo_alloc(i965->intel.bufmgr,
1824                       "sf state",
1825                       sizeof(struct i965_sf_unit_state),
1826                       64);
1827     assert(bo);
1828     render_state->sf.state = bo;
1829
1830     /* WM */
1831     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1832     bo = dri_bo_alloc(i965->intel.bufmgr,
1833                       "surface state & binding table",
1834                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1835                       4096);
1836     assert(bo);
1837     render_state->wm.surface_state_binding_table_bo = bo;
1838
1839     dri_bo_unreference(render_state->wm.sampler);
1840     bo = dri_bo_alloc(i965->intel.bufmgr,
1841                       "sampler state",
1842                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1843                       64);
1844     assert(bo);
1845     render_state->wm.sampler = bo;
1846     render_state->wm.sampler_count = 0;
1847
1848     dri_bo_unreference(render_state->wm.state);
1849     bo = dri_bo_alloc(i965->intel.bufmgr,
1850                       "wm state",
1851                       sizeof(struct i965_wm_unit_state),
1852                       64);
1853     assert(bo);
1854     render_state->wm.state = bo;
1855
1856     /* COLOR CALCULATOR */
1857     dri_bo_unreference(render_state->cc.state);
1858     bo = dri_bo_alloc(i965->intel.bufmgr,
1859                       "color calc state",
1860                       sizeof(struct i965_cc_unit_state),
1861                       64);
1862     assert(bo);
1863     render_state->cc.state = bo;
1864
1865     dri_bo_unreference(render_state->cc.viewport);
1866     bo = dri_bo_alloc(i965->intel.bufmgr,
1867                       "cc viewport",
1868                       sizeof(struct i965_cc_viewport),
1869                       64);
1870     assert(bo);
1871     render_state->cc.viewport = bo;
1872 }
1873
1874 static void
1875 i965_render_put_surface(
1876     VADriverContextP   ctx,
1877     struct object_surface *obj_surface,
1878     const VARectangle *src_rect,
1879     const VARectangle *dst_rect,
1880     unsigned int       flags
1881 )
1882 {
1883     struct i965_driver_data *i965 = i965_driver_data(ctx);
1884     struct intel_batchbuffer *batch = i965->batch;
1885
1886     i965_render_initialize(ctx);
1887     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1888     i965_surface_render_pipeline_setup(ctx);
1889     intel_batchbuffer_flush(batch);
1890 }
1891
1892 static void
1893 i965_render_put_subpicture(
1894     VADriverContextP   ctx,
1895     struct object_surface *obj_surface,
1896     const VARectangle *src_rect,
1897     const VARectangle *dst_rect
1898 )
1899 {
1900     struct i965_driver_data *i965 = i965_driver_data(ctx);
1901     struct intel_batchbuffer *batch = i965->batch;
1902     unsigned int index = obj_surface->subpic_render_idx;
1903     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1904
1905     assert(obj_subpic);
1906
1907     i965_render_initialize(ctx);
1908     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1909     i965_subpic_render_pipeline_setup(ctx);
1910     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1911     intel_batchbuffer_flush(batch);
1912 }
1913
1914 /*
1915  * for GEN6+
1916  */
1917 static void 
1918 gen6_render_initialize(VADriverContextP ctx)
1919 {
1920     struct i965_driver_data *i965 = i965_driver_data(ctx);
1921     struct i965_render_state *render_state = &i965->render_state;
1922     dri_bo *bo;
1923
1924     /* VERTEX BUFFER */
1925     dri_bo_unreference(render_state->vb.vertex_buffer);
1926     bo = dri_bo_alloc(i965->intel.bufmgr,
1927                       "vertex buffer",
1928                       4096,
1929                       4096);
1930     assert(bo);
1931     render_state->vb.vertex_buffer = bo;
1932
1933     /* WM */
1934     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1935     bo = dri_bo_alloc(i965->intel.bufmgr,
1936                       "surface state & binding table",
1937                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1938                       4096);
1939     assert(bo);
1940     render_state->wm.surface_state_binding_table_bo = bo;
1941
1942     dri_bo_unreference(render_state->wm.sampler);
1943     bo = dri_bo_alloc(i965->intel.bufmgr,
1944                       "sampler state",
1945                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1946                       4096);
1947     assert(bo);
1948     render_state->wm.sampler = bo;
1949     render_state->wm.sampler_count = 0;
1950
1951     /* COLOR CALCULATOR */
1952     dri_bo_unreference(render_state->cc.state);
1953     bo = dri_bo_alloc(i965->intel.bufmgr,
1954                       "color calc state",
1955                       sizeof(struct gen6_color_calc_state),
1956                       4096);
1957     assert(bo);
1958     render_state->cc.state = bo;
1959
1960     /* CC VIEWPORT */
1961     dri_bo_unreference(render_state->cc.viewport);
1962     bo = dri_bo_alloc(i965->intel.bufmgr,
1963                       "cc viewport",
1964                       sizeof(struct i965_cc_viewport),
1965                       4096);
1966     assert(bo);
1967     render_state->cc.viewport = bo;
1968
1969     /* BLEND STATE */
1970     dri_bo_unreference(render_state->cc.blend);
1971     bo = dri_bo_alloc(i965->intel.bufmgr,
1972                       "blend state",
1973                       sizeof(struct gen6_blend_state),
1974                       4096);
1975     assert(bo);
1976     render_state->cc.blend = bo;
1977
1978     /* DEPTH & STENCIL STATE */
1979     dri_bo_unreference(render_state->cc.depth_stencil);
1980     bo = dri_bo_alloc(i965->intel.bufmgr,
1981                       "depth & stencil state",
1982                       sizeof(struct gen6_depth_stencil_state),
1983                       4096);
1984     assert(bo);
1985     render_state->cc.depth_stencil = bo;
1986 }
1987
1988 static void
1989 gen6_render_color_calc_state(VADriverContextP ctx)
1990 {
1991     struct i965_driver_data *i965 = i965_driver_data(ctx);
1992     struct i965_render_state *render_state = &i965->render_state;
1993     struct gen6_color_calc_state *color_calc_state;
1994     
1995     dri_bo_map(render_state->cc.state, 1);
1996     assert(render_state->cc.state->virtual);
1997     color_calc_state = render_state->cc.state->virtual;
1998     memset(color_calc_state, 0, sizeof(*color_calc_state));
1999     color_calc_state->constant_r = 1.0;
2000     color_calc_state->constant_g = 0.0;
2001     color_calc_state->constant_b = 1.0;
2002     color_calc_state->constant_a = 1.0;
2003     dri_bo_unmap(render_state->cc.state);
2004 }
2005
2006 static void
2007 gen6_render_blend_state(VADriverContextP ctx)
2008 {
2009     struct i965_driver_data *i965 = i965_driver_data(ctx);
2010     struct i965_render_state *render_state = &i965->render_state;
2011     struct gen6_blend_state *blend_state;
2012     
2013     dri_bo_map(render_state->cc.blend, 1);
2014     assert(render_state->cc.blend->virtual);
2015     blend_state = render_state->cc.blend->virtual;
2016     memset(blend_state, 0, sizeof(*blend_state));
2017     blend_state->blend1.logic_op_enable = 1;
2018     blend_state->blend1.logic_op_func = 0xc;
2019     dri_bo_unmap(render_state->cc.blend);
2020 }
2021
2022 static void
2023 gen6_render_depth_stencil_state(VADriverContextP ctx)
2024 {
2025     struct i965_driver_data *i965 = i965_driver_data(ctx);
2026     struct i965_render_state *render_state = &i965->render_state;
2027     struct gen6_depth_stencil_state *depth_stencil_state;
2028     
2029     dri_bo_map(render_state->cc.depth_stencil, 1);
2030     assert(render_state->cc.depth_stencil->virtual);
2031     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2032     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2033     dri_bo_unmap(render_state->cc.depth_stencil);
2034 }
2035
2036 static void
2037 gen6_render_setup_states(
2038     VADriverContextP   ctx,
2039     struct object_surface *obj_surface,
2040     const VARectangle *src_rect,
2041     const VARectangle *dst_rect,
2042     unsigned int       flags
2043 )
2044 {
2045     i965_render_dest_surface_state(ctx, 0);
2046     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2047     i965_render_sampler(ctx);
2048     i965_render_cc_viewport(ctx);
2049     gen6_render_color_calc_state(ctx);
2050     gen6_render_blend_state(ctx);
2051     gen6_render_depth_stencil_state(ctx);
2052     i965_render_upload_constants(ctx, obj_surface, flags);
2053     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2054 }
2055
2056 static void
2057 gen6_emit_invarient_states(VADriverContextP ctx)
2058 {
2059     struct i965_driver_data *i965 = i965_driver_data(ctx);
2060     struct intel_batchbuffer *batch = i965->batch;
2061
2062     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2063
2064     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2065     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2066               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2067     OUT_BATCH(batch, 0);
2068
2069     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2070     OUT_BATCH(batch, 1);
2071
2072     /* Set system instruction pointer */
2073     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2074     OUT_BATCH(batch, 0);
2075 }
2076
2077 static void
2078 gen6_emit_state_base_address(VADriverContextP ctx)
2079 {
2080     struct i965_driver_data *i965 = i965_driver_data(ctx);
2081     struct intel_batchbuffer *batch = i965->batch;
2082     struct i965_render_state *render_state = &i965->render_state;
2083
2084     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2085     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2086     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2087     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2088     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2089     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2090     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2091     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2092     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2093     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2094 }
2095
2096 static void
2097 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2098 {
2099     struct i965_driver_data *i965 = i965_driver_data(ctx);
2100     struct intel_batchbuffer *batch = i965->batch;
2101     struct i965_render_state *render_state = &i965->render_state;
2102
2103     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2104               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2105               (4 - 2));
2106     OUT_BATCH(batch, 0);
2107     OUT_BATCH(batch, 0);
2108     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2109 }
2110
2111 static void
2112 gen6_emit_urb(VADriverContextP ctx)
2113 {
2114     struct i965_driver_data *i965 = i965_driver_data(ctx);
2115     struct intel_batchbuffer *batch = i965->batch;
2116
2117     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2118     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2119               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2120     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2121               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2122 }
2123
2124 static void
2125 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2126 {
2127     struct i965_driver_data *i965 = i965_driver_data(ctx);
2128     struct intel_batchbuffer *batch = i965->batch;
2129     struct i965_render_state *render_state = &i965->render_state;
2130
2131     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2132     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2133     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2134     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2135 }
2136
2137 static void
2138 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2139 {
2140     struct i965_driver_data *i965 = i965_driver_data(ctx);
2141     struct intel_batchbuffer *batch = i965->batch;
2142     struct i965_render_state *render_state = &i965->render_state;
2143
2144     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2145               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2146               (4 - 2));
2147     OUT_BATCH(batch, 0); /* VS */
2148     OUT_BATCH(batch, 0); /* GS */
2149     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2150 }
2151
2152 static void
2153 gen6_emit_binding_table(VADriverContextP ctx)
2154 {
2155     struct i965_driver_data *i965 = i965_driver_data(ctx);
2156     struct intel_batchbuffer *batch = i965->batch;
2157
2158     /* Binding table pointers */
2159     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2160               GEN6_BINDING_TABLE_MODIFY_PS |
2161               (4 - 2));
2162     OUT_BATCH(batch, 0);                /* vs */
2163     OUT_BATCH(batch, 0);                /* gs */
2164     /* Only the PS uses the binding table */
2165     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2166 }
2167
2168 static void
2169 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2170 {
2171     struct i965_driver_data *i965 = i965_driver_data(ctx);
2172     struct intel_batchbuffer *batch = i965->batch;
2173
2174     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2175     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2176               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2177     OUT_BATCH(batch, 0);
2178     OUT_BATCH(batch, 0);
2179     OUT_BATCH(batch, 0);
2180     OUT_BATCH(batch, 0);
2181     OUT_BATCH(batch, 0);
2182
2183     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2184     OUT_BATCH(batch, 0);
2185 }
2186
2187 static void
2188 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2189 {
2190     i965_render_drawing_rectangle(ctx);
2191 }
2192
2193 static void 
2194 gen6_emit_vs_state(VADriverContextP ctx)
2195 {
2196     struct i965_driver_data *i965 = i965_driver_data(ctx);
2197     struct intel_batchbuffer *batch = i965->batch;
2198
2199     /* disable VS constant buffer */
2200     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2201     OUT_BATCH(batch, 0);
2202     OUT_BATCH(batch, 0);
2203     OUT_BATCH(batch, 0);
2204     OUT_BATCH(batch, 0);
2205         
2206     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2207     OUT_BATCH(batch, 0); /* without VS kernel */
2208     OUT_BATCH(batch, 0);
2209     OUT_BATCH(batch, 0);
2210     OUT_BATCH(batch, 0);
2211     OUT_BATCH(batch, 0); /* pass-through */
2212 }
2213
2214 static void 
2215 gen6_emit_gs_state(VADriverContextP ctx)
2216 {
2217     struct i965_driver_data *i965 = i965_driver_data(ctx);
2218     struct intel_batchbuffer *batch = i965->batch;
2219
2220     /* disable GS constant buffer */
2221     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2222     OUT_BATCH(batch, 0);
2223     OUT_BATCH(batch, 0);
2224     OUT_BATCH(batch, 0);
2225     OUT_BATCH(batch, 0);
2226         
2227     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2228     OUT_BATCH(batch, 0); /* without GS kernel */
2229     OUT_BATCH(batch, 0);
2230     OUT_BATCH(batch, 0);
2231     OUT_BATCH(batch, 0);
2232     OUT_BATCH(batch, 0);
2233     OUT_BATCH(batch, 0); /* pass-through */
2234 }
2235
2236 static void 
2237 gen6_emit_clip_state(VADriverContextP ctx)
2238 {
2239     struct i965_driver_data *i965 = i965_driver_data(ctx);
2240     struct intel_batchbuffer *batch = i965->batch;
2241
2242     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2243     OUT_BATCH(batch, 0);
2244     OUT_BATCH(batch, 0); /* pass-through */
2245     OUT_BATCH(batch, 0);
2246 }
2247
2248 static void 
2249 gen6_emit_sf_state(VADriverContextP ctx)
2250 {
2251     struct i965_driver_data *i965 = i965_driver_data(ctx);
2252     struct intel_batchbuffer *batch = i965->batch;
2253
2254     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2255     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2256               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2257               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2258     OUT_BATCH(batch, 0);
2259     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2260     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2261     OUT_BATCH(batch, 0);
2262     OUT_BATCH(batch, 0);
2263     OUT_BATCH(batch, 0);
2264     OUT_BATCH(batch, 0);
2265     OUT_BATCH(batch, 0); /* DW9 */
2266     OUT_BATCH(batch, 0);
2267     OUT_BATCH(batch, 0);
2268     OUT_BATCH(batch, 0);
2269     OUT_BATCH(batch, 0);
2270     OUT_BATCH(batch, 0); /* DW14 */
2271     OUT_BATCH(batch, 0);
2272     OUT_BATCH(batch, 0);
2273     OUT_BATCH(batch, 0);
2274     OUT_BATCH(batch, 0);
2275     OUT_BATCH(batch, 0); /* DW19 */
2276 }
2277
2278 static void 
2279 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2280 {
2281     struct i965_driver_data *i965 = i965_driver_data(ctx);
2282     struct intel_batchbuffer *batch = i965->batch;
2283     struct i965_render_state *render_state = &i965->render_state;
2284
2285     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2286               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2287               (5 - 2));
2288     OUT_RELOC(batch, 
2289               render_state->curbe.bo,
2290               I915_GEM_DOMAIN_INSTRUCTION, 0,
2291               (URB_CS_ENTRY_SIZE-1));
2292     OUT_BATCH(batch, 0);
2293     OUT_BATCH(batch, 0);
2294     OUT_BATCH(batch, 0);
2295
2296     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2297     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2298               I915_GEM_DOMAIN_INSTRUCTION, 0,
2299               0);
2300     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2301               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2302     OUT_BATCH(batch, 0);
2303     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2304     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2305               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2306               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2307     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2308               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2309     OUT_BATCH(batch, 0);
2310     OUT_BATCH(batch, 0);
2311 }
2312
2313 static void
2314 gen6_emit_vertex_element_state(VADriverContextP ctx)
2315 {
2316     struct i965_driver_data *i965 = i965_driver_data(ctx);
2317     struct intel_batchbuffer *batch = i965->batch;
2318
2319     /* Set up our vertex elements, sourced from the single vertex buffer. */
2320     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2321     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2322     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2323               GEN6_VE0_VALID |
2324               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2325               (0 << VE0_OFFSET_SHIFT));
2326     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2327               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2328               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2329               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2330     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2331     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2332               GEN6_VE0_VALID |
2333               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2334               (8 << VE0_OFFSET_SHIFT));
2335     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2336               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2337               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2338               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2339 }
2340
2341 static void
2342 gen6_emit_vertices(VADriverContextP ctx)
2343 {
2344     struct i965_driver_data *i965 = i965_driver_data(ctx);
2345     struct intel_batchbuffer *batch = i965->batch;
2346     struct i965_render_state *render_state = &i965->render_state;
2347
2348     BEGIN_BATCH(batch, 11);
2349     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2350     OUT_BATCH(batch, 
2351               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2352               GEN6_VB0_VERTEXDATA |
2353               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2354     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2355     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2356     OUT_BATCH(batch, 0);
2357
2358     OUT_BATCH(batch, 
2359               CMD_3DPRIMITIVE |
2360               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2361               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2362               (0 << 9) |
2363               4);
2364     OUT_BATCH(batch, 3); /* vertex count per instance */
2365     OUT_BATCH(batch, 0); /* start vertex offset */
2366     OUT_BATCH(batch, 1); /* single instance */
2367     OUT_BATCH(batch, 0); /* start instance location */
2368     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2369     ADVANCE_BATCH(batch);
2370 }
2371
2372 static void
2373 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2374 {
2375     struct i965_driver_data *i965 = i965_driver_data(ctx);
2376     struct intel_batchbuffer *batch = i965->batch;
2377
2378     intel_batchbuffer_start_atomic(batch, 0x1000);
2379     intel_batchbuffer_emit_mi_flush(batch);
2380     gen6_emit_invarient_states(ctx);
2381     gen6_emit_state_base_address(ctx);
2382     gen6_emit_viewport_state_pointers(ctx);
2383     gen6_emit_urb(ctx);
2384     gen6_emit_cc_state_pointers(ctx);
2385     gen6_emit_sampler_state_pointers(ctx);
2386     gen6_emit_vs_state(ctx);
2387     gen6_emit_gs_state(ctx);
2388     gen6_emit_clip_state(ctx);
2389     gen6_emit_sf_state(ctx);
2390     gen6_emit_wm_state(ctx, kernel);
2391     gen6_emit_binding_table(ctx);
2392     gen6_emit_depth_buffer_state(ctx);
2393     gen6_emit_drawing_rectangle(ctx);
2394     gen6_emit_vertex_element_state(ctx);
2395     gen6_emit_vertices(ctx);
2396     intel_batchbuffer_end_atomic(batch);
2397 }
2398
2399 static void
2400 gen6_render_put_surface(
2401     VADriverContextP   ctx,
2402     struct object_surface *obj_surface,
2403     const VARectangle *src_rect,
2404     const VARectangle *dst_rect,
2405     unsigned int       flags
2406 )
2407 {
2408     struct i965_driver_data *i965 = i965_driver_data(ctx);
2409     struct intel_batchbuffer *batch = i965->batch;
2410
2411     gen6_render_initialize(ctx);
2412     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2413     i965_clear_dest_region(ctx);
2414     gen6_render_emit_states(ctx, PS_KERNEL);
2415     intel_batchbuffer_flush(batch);
2416 }
2417
2418 static void
2419 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2420 {
2421     struct i965_driver_data *i965 = i965_driver_data(ctx);
2422     struct i965_render_state *render_state = &i965->render_state;
2423     struct gen6_blend_state *blend_state;
2424
2425     dri_bo_unmap(render_state->cc.state);    
2426     dri_bo_map(render_state->cc.blend, 1);
2427     assert(render_state->cc.blend->virtual);
2428     blend_state = render_state->cc.blend->virtual;
2429     memset(blend_state, 0, sizeof(*blend_state));
2430     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2431     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2432     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2433     blend_state->blend0.blend_enable = 1;
2434     blend_state->blend1.post_blend_clamp_enable = 1;
2435     blend_state->blend1.pre_blend_clamp_enable = 1;
2436     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2437     dri_bo_unmap(render_state->cc.blend);
2438 }
2439
2440 static void
2441 gen6_subpicture_render_setup_states(
2442     VADriverContextP   ctx,
2443     struct object_surface *obj_surface,
2444     const VARectangle *src_rect,
2445     const VARectangle *dst_rect
2446 )
2447 {
2448     i965_render_dest_surface_state(ctx, 0);
2449     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2450     i965_render_sampler(ctx);
2451     i965_render_cc_viewport(ctx);
2452     gen6_render_color_calc_state(ctx);
2453     gen6_subpicture_render_blend_state(ctx);
2454     gen6_render_depth_stencil_state(ctx);
2455     i965_subpic_render_upload_constants(ctx, obj_surface);
2456     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2457 }
2458
2459 static void
2460 gen6_render_put_subpicture(
2461     VADriverContextP   ctx,
2462     struct object_surface *obj_surface,
2463     const VARectangle *src_rect,
2464     const VARectangle *dst_rect
2465 )
2466 {
2467     struct i965_driver_data *i965 = i965_driver_data(ctx);
2468     struct intel_batchbuffer *batch = i965->batch;
2469     unsigned int index = obj_surface->subpic_render_idx;
2470     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2471
2472     assert(obj_subpic);
2473     gen6_render_initialize(ctx);
2474     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2475     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2476     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2477     intel_batchbuffer_flush(batch);
2478 }
2479
2480 /*
2481  * for GEN7
2482  */
2483 static void 
2484 gen7_render_initialize(VADriverContextP ctx)
2485 {
2486     struct i965_driver_data *i965 = i965_driver_data(ctx);
2487     struct i965_render_state *render_state = &i965->render_state;
2488     dri_bo *bo;
2489     int size;
2490
2491     /* VERTEX BUFFER */
2492     dri_bo_unreference(render_state->vb.vertex_buffer);
2493     bo = dri_bo_alloc(i965->intel.bufmgr,
2494                       "vertex buffer",
2495                       4096,
2496                       4096);
2497     assert(bo);
2498     render_state->vb.vertex_buffer = bo;
2499
2500     /* WM */
2501     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2502     bo = dri_bo_alloc(i965->intel.bufmgr,
2503                       "surface state & binding table",
2504                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2505                       4096);
2506     assert(bo);
2507     render_state->wm.surface_state_binding_table_bo = bo;
2508
2509     dri_bo_unreference(render_state->wm.sampler);
2510     bo = dri_bo_alloc(i965->intel.bufmgr,
2511                       "sampler state",
2512                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2513                       4096);
2514     assert(bo);
2515     render_state->wm.sampler = bo;
2516     render_state->wm.sampler_count = 0;
2517
2518     /* COLOR CALCULATOR */
2519     dri_bo_unreference(render_state->cc.state);
2520     bo = dri_bo_alloc(i965->intel.bufmgr,
2521                       "color calc state",
2522                       sizeof(struct gen6_color_calc_state),
2523                       4096);
2524     assert(bo);
2525     render_state->cc.state = bo;
2526
2527     /* CC VIEWPORT */
2528     dri_bo_unreference(render_state->cc.viewport);
2529     bo = dri_bo_alloc(i965->intel.bufmgr,
2530                       "cc viewport",
2531                       sizeof(struct i965_cc_viewport),
2532                       4096);
2533     assert(bo);
2534     render_state->cc.viewport = bo;
2535
2536     /* BLEND STATE */
2537     dri_bo_unreference(render_state->cc.blend);
2538     size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt);
2539     bo = dri_bo_alloc(i965->intel.bufmgr,
2540                       "blend state",
2541                       size,
2542                       4096);
2543     assert(bo);
2544     render_state->cc.blend = bo;
2545
2546     /* DEPTH & STENCIL STATE */
2547     dri_bo_unreference(render_state->cc.depth_stencil);
2548     bo = dri_bo_alloc(i965->intel.bufmgr,
2549                       "depth & stencil state",
2550                       sizeof(struct gen6_depth_stencil_state),
2551                       4096);
2552     assert(bo);
2553     render_state->cc.depth_stencil = bo;
2554 }
2555
2556 /*
2557  * for GEN8
2558  */
2559 static void 
2560 gen8_render_initialize(VADriverContextP ctx)
2561 {
2562     struct i965_driver_data *i965 = i965_driver_data(ctx);
2563     struct i965_render_state *render_state = &i965->render_state;
2564     dri_bo *bo;
2565
2566     /* VERTEX BUFFER */
2567     dri_bo_unreference(render_state->vb.vertex_buffer);
2568     bo = dri_bo_alloc(i965->intel.bufmgr,
2569                       "vertex buffer",
2570                       4096,
2571                       4096);
2572     assert(bo);
2573     render_state->vb.vertex_buffer = bo;
2574
2575     /* WM */
2576     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2577     bo = dri_bo_alloc(i965->intel.bufmgr,
2578                       "surface state & binding table",
2579                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2580                       4096);
2581     assert(bo);
2582     render_state->wm.surface_state_binding_table_bo = bo;
2583
2584     dri_bo_unreference(render_state->wm.sampler);
2585     bo = dri_bo_alloc(i965->intel.bufmgr,
2586                       "sampler state",
2587                       MAX_SAMPLERS * sizeof(struct gen8_sampler_state),
2588                       4096);
2589     assert(bo);
2590     render_state->wm.sampler = bo;
2591     render_state->wm.sampler_count = 0;
2592
2593     /* COLOR CALCULATOR */
2594     dri_bo_unreference(render_state->cc.state);
2595     bo = dri_bo_alloc(i965->intel.bufmgr,
2596                       "color calc state",
2597                       sizeof(struct gen6_color_calc_state),
2598                       4096);
2599     assert(bo);
2600     render_state->cc.state = bo;
2601
2602     /* CC VIEWPORT */
2603     dri_bo_unreference(render_state->cc.viewport);
2604     bo = dri_bo_alloc(i965->intel.bufmgr,
2605                       "cc viewport",
2606                       sizeof(struct i965_cc_viewport),
2607                       4096);
2608     assert(bo);
2609     render_state->cc.viewport = bo;
2610
2611     /* BLEND STATE */
2612     dri_bo_unreference(render_state->cc.blend);
2613     bo = dri_bo_alloc(i965->intel.bufmgr,
2614                       "blend state",
2615                       sizeof(struct gen6_blend_state),
2616                       4096);
2617     assert(bo);
2618     render_state->cc.blend = bo;
2619
2620     /* DEPTH & STENCIL STATE */
2621     dri_bo_unreference(render_state->cc.depth_stencil);
2622     bo = dri_bo_alloc(i965->intel.bufmgr,
2623                       "depth & stencil state",
2624                       sizeof(struct gen6_depth_stencil_state),
2625                       4096);
2626     assert(bo);
2627     render_state->cc.depth_stencil = bo;
2628 }
2629
2630 static void
2631 gen7_render_color_calc_state(VADriverContextP ctx)
2632 {
2633     struct i965_driver_data *i965 = i965_driver_data(ctx);
2634     struct i965_render_state *render_state = &i965->render_state;
2635     struct gen6_color_calc_state *color_calc_state;
2636     
2637     dri_bo_map(render_state->cc.state, 1);
2638     assert(render_state->cc.state->virtual);
2639     color_calc_state = render_state->cc.state->virtual;
2640     memset(color_calc_state, 0, sizeof(*color_calc_state));
2641     color_calc_state->constant_r = 1.0;
2642     color_calc_state->constant_g = 0.0;
2643     color_calc_state->constant_b = 1.0;
2644     color_calc_state->constant_a = 1.0;
2645     dri_bo_unmap(render_state->cc.state);
2646 }
2647
2648 static void
2649 gen7_render_blend_state(VADriverContextP ctx)
2650 {
2651     struct i965_driver_data *i965 = i965_driver_data(ctx);
2652     struct i965_render_state *render_state = &i965->render_state;
2653     struct gen6_blend_state *blend_state;
2654     
2655     dri_bo_map(render_state->cc.blend, 1);
2656     assert(render_state->cc.blend->virtual);
2657     blend_state = render_state->cc.blend->virtual;
2658     memset(blend_state, 0, sizeof(*blend_state));
2659     blend_state->blend1.logic_op_enable = 1;
2660     blend_state->blend1.logic_op_func = 0xc;
2661     blend_state->blend1.pre_blend_clamp_enable = 1;
2662     dri_bo_unmap(render_state->cc.blend);
2663 }
2664
2665 static void
2666 gen7_render_depth_stencil_state(VADriverContextP ctx)
2667 {
2668     struct i965_driver_data *i965 = i965_driver_data(ctx);
2669     struct i965_render_state *render_state = &i965->render_state;
2670     struct gen6_depth_stencil_state *depth_stencil_state;
2671     
2672     dri_bo_map(render_state->cc.depth_stencil, 1);
2673     assert(render_state->cc.depth_stencil->virtual);
2674     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2675     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2676     dri_bo_unmap(render_state->cc.depth_stencil);
2677 }
2678
2679 static void 
2680 gen7_render_sampler(VADriverContextP ctx)
2681 {
2682     struct i965_driver_data *i965 = i965_driver_data(ctx);
2683     struct i965_render_state *render_state = &i965->render_state;
2684     struct gen7_sampler_state *sampler_state;
2685     int i;
2686     
2687     assert(render_state->wm.sampler_count > 0);
2688     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2689
2690     dri_bo_map(render_state->wm.sampler, 1);
2691     assert(render_state->wm.sampler->virtual);
2692     sampler_state = render_state->wm.sampler->virtual;
2693     for (i = 0; i < render_state->wm.sampler_count; i++) {
2694         memset(sampler_state, 0, sizeof(*sampler_state));
2695         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2696         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2697         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2698         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2699         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2700         sampler_state++;
2701     }
2702
2703     dri_bo_unmap(render_state->wm.sampler);
2704 }
2705
2706 static void 
2707 gen8_render_sampler(VADriverContextP ctx)
2708 {
2709     struct i965_driver_data *i965 = i965_driver_data(ctx);
2710     struct i965_render_state *render_state = &i965->render_state;
2711     struct gen8_sampler_state *sampler_state;
2712     int i;
2713     
2714     assert(render_state->wm.sampler_count > 0);
2715     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2716
2717     dri_bo_map(render_state->wm.sampler, 1);
2718     assert(render_state->wm.sampler->virtual);
2719     sampler_state = render_state->wm.sampler->virtual;
2720     for (i = 0; i < render_state->wm.sampler_count; i++) {
2721         memset(sampler_state, 0, sizeof(*sampler_state));
2722         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2723         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2724         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2725         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2726         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2727         sampler_state++;
2728     }
2729
2730     dri_bo_unmap(render_state->wm.sampler);
2731 }
2732
2733
2734 static void
2735 gen7_render_setup_states(
2736     VADriverContextP   ctx,
2737     struct object_surface *obj_surface,
2738     const VARectangle *src_rect,
2739     const VARectangle *dst_rect,
2740     unsigned int       flags
2741 )
2742 {
2743     i965_render_dest_surface_state(ctx, 0);
2744     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2745     gen7_render_sampler(ctx);
2746     i965_render_cc_viewport(ctx);
2747     gen7_render_color_calc_state(ctx);
2748     gen7_render_blend_state(ctx);
2749     gen7_render_depth_stencil_state(ctx);
2750     i965_render_upload_constants(ctx, obj_surface, flags);
2751     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2752 }
2753
2754 static void
2755 gen8_render_blend_state(VADriverContextP ctx)
2756 {
2757     struct i965_driver_data *i965 = i965_driver_data(ctx);
2758     struct i965_render_state *render_state = &i965->render_state;
2759     struct gen8_global_blend_state *global_blend_state;
2760     struct gen8_blend_state_rt *blend_state;
2761     
2762     dri_bo_map(render_state->cc.blend, 1);
2763     assert(render_state->cc.blend->virtual);
2764     global_blend_state = render_state->cc.blend->virtual;
2765     memset(global_blend_state, 0, sizeof(*global_blend_state));
2766     /* Global blend state + blend_state for Render Target */
2767     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
2768     blend_state->blend1.logic_op_enable = 1;
2769     blend_state->blend1.logic_op_func = 0xc;
2770     blend_state->blend1.pre_blend_clamp_enable = 1;
2771     dri_bo_unmap(render_state->cc.blend);
2772 }
2773
2774 static void
2775 gen8_render_setup_states(
2776     VADriverContextP   ctx,
2777     struct object_surface *obj_surface,
2778     const VARectangle *src_rect,
2779     const VARectangle *dst_rect,
2780     unsigned int       flags
2781 )
2782 {
2783     i965_render_dest_surface_state(ctx, 0);
2784     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2785     gen8_render_sampler(ctx);
2786     i965_render_cc_viewport(ctx);
2787     gen7_render_color_calc_state(ctx);
2788     gen8_render_blend_state(ctx);
2789     i965_render_upload_constants(ctx, obj_surface, flags);
2790     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2791 }
2792
2793 static void
2794 gen7_emit_invarient_states(VADriverContextP ctx)
2795 {
2796     struct i965_driver_data *i965 = i965_driver_data(ctx);
2797     struct intel_batchbuffer *batch = i965->batch;
2798
2799     BEGIN_BATCH(batch, 1);
2800     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2801     ADVANCE_BATCH(batch);
2802
2803     BEGIN_BATCH(batch, 4);
2804     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2805     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2806               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2807     OUT_BATCH(batch, 0);
2808     OUT_BATCH(batch, 0);
2809     ADVANCE_BATCH(batch);
2810
2811     BEGIN_BATCH(batch, 2);
2812     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2813     OUT_BATCH(batch, 1);
2814     ADVANCE_BATCH(batch);
2815
2816     /* Set system instruction pointer */
2817     BEGIN_BATCH(batch, 2);
2818     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2819     OUT_BATCH(batch, 0);
2820     ADVANCE_BATCH(batch);
2821 }
2822
2823 static void
2824 gen7_emit_state_base_address(VADriverContextP ctx)
2825 {
2826     struct i965_driver_data *i965 = i965_driver_data(ctx);
2827     struct intel_batchbuffer *batch = i965->batch;
2828     struct i965_render_state *render_state = &i965->render_state;
2829
2830     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2831     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2832     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2833     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2834     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2835     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2836     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2837     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2838     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2839     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2840 }
2841
2842 static void
2843 gen8_emit_state_base_address(VADriverContextP ctx)
2844 {
2845     struct i965_driver_data *i965 = i965_driver_data(ctx);
2846     struct intel_batchbuffer *batch = i965->batch;
2847     struct i965_render_state *render_state = &i965->render_state;
2848
2849     BEGIN_BATCH(batch, 16);
2850     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
2851     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2852         OUT_BATCH(batch, 0);
2853         OUT_BATCH(batch, 0);
2854         /*DW4 */
2855     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2856         OUT_BATCH(batch, 0);
2857
2858         /*DW6*/
2859     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2860         OUT_BATCH(batch, 0);
2861
2862         /*DW8*/
2863     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2864         OUT_BATCH(batch, 0);
2865
2866         /*DW10 */
2867     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2868         OUT_BATCH(batch, 0);
2869
2870         /*DW12 */       
2871     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
2872     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2873     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2874     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2875     ADVANCE_BATCH(batch);
2876 }
2877
2878 static void
2879 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2880 {
2881     struct i965_driver_data *i965 = i965_driver_data(ctx);
2882     struct intel_batchbuffer *batch = i965->batch;
2883     struct i965_render_state *render_state = &i965->render_state;
2884
2885     BEGIN_BATCH(batch, 2);
2886     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2887     OUT_RELOC(batch,
2888               render_state->cc.viewport,
2889               I915_GEM_DOMAIN_INSTRUCTION, 0,
2890               0);
2891     ADVANCE_BATCH(batch);
2892
2893     BEGIN_BATCH(batch, 2);
2894     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2895     OUT_BATCH(batch, 0);
2896     ADVANCE_BATCH(batch);
2897 }
2898
2899 /*
2900  * URB layout on GEN7 
2901  * ----------------------------------------
2902  * | PS Push Constants (8KB) | VS entries |
2903  * ----------------------------------------
2904  */
2905 static void
2906 gen7_emit_urb(VADriverContextP ctx)
2907 {
2908     struct i965_driver_data *i965 = i965_driver_data(ctx);
2909     struct intel_batchbuffer *batch = i965->batch;
2910     unsigned int num_urb_entries = 32;
2911
2912     if (IS_HASWELL(i965->intel.device_id))
2913         num_urb_entries = 64;
2914
2915     BEGIN_BATCH(batch, 2);
2916     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2917     OUT_BATCH(batch, 8); /* in 1KBs */
2918     ADVANCE_BATCH(batch);
2919
2920     BEGIN_BATCH(batch, 2);
2921     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2922     OUT_BATCH(batch, 
2923               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2924               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2925               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2926    ADVANCE_BATCH(batch);
2927
2928    BEGIN_BATCH(batch, 2);
2929    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2930    OUT_BATCH(batch,
2931              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2932              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2933    ADVANCE_BATCH(batch);
2934
2935    BEGIN_BATCH(batch, 2);
2936    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2937    OUT_BATCH(batch,
2938              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2939              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2940    ADVANCE_BATCH(batch);
2941
2942    BEGIN_BATCH(batch, 2);
2943    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2944    OUT_BATCH(batch,
2945              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2946              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2947    ADVANCE_BATCH(batch);
2948 }
2949
2950 static void
2951 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2952 {
2953     struct i965_driver_data *i965 = i965_driver_data(ctx);
2954     struct intel_batchbuffer *batch = i965->batch;
2955     struct i965_render_state *render_state = &i965->render_state;
2956
2957     BEGIN_BATCH(batch, 2);
2958     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2959     OUT_RELOC(batch,
2960               render_state->cc.state,
2961               I915_GEM_DOMAIN_INSTRUCTION, 0,
2962               1);
2963     ADVANCE_BATCH(batch);
2964
2965     BEGIN_BATCH(batch, 2);
2966     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2967     OUT_RELOC(batch,
2968               render_state->cc.blend,
2969               I915_GEM_DOMAIN_INSTRUCTION, 0,
2970               1);
2971     ADVANCE_BATCH(batch);
2972
2973     BEGIN_BATCH(batch, 2);
2974     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2975     OUT_RELOC(batch,
2976               render_state->cc.depth_stencil,
2977               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2978               1);
2979     ADVANCE_BATCH(batch);
2980 }
2981
2982 static void
2983 gen8_emit_cc_state_pointers(VADriverContextP ctx)
2984 {
2985     struct i965_driver_data *i965 = i965_driver_data(ctx);
2986     struct intel_batchbuffer *batch = i965->batch;
2987     struct i965_render_state *render_state = &i965->render_state;
2988
2989     BEGIN_BATCH(batch, 2);
2990     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2991     OUT_RELOC(batch,
2992               render_state->cc.state,
2993               I915_GEM_DOMAIN_INSTRUCTION, 0,
2994               1);
2995     ADVANCE_BATCH(batch);
2996
2997     BEGIN_BATCH(batch, 2);
2998     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2999     OUT_RELOC(batch,
3000               render_state->cc.blend,
3001               I915_GEM_DOMAIN_INSTRUCTION, 0,
3002               1);
3003     ADVANCE_BATCH(batch);
3004
3005 }
3006
3007 static void
3008 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
3009 {
3010     struct i965_driver_data *i965 = i965_driver_data(ctx);
3011     struct intel_batchbuffer *batch = i965->batch;
3012     struct i965_render_state *render_state = &i965->render_state;
3013
3014     BEGIN_BATCH(batch, 2);
3015     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
3016     OUT_RELOC(batch,
3017               render_state->wm.sampler,
3018               I915_GEM_DOMAIN_INSTRUCTION, 0,
3019               0);
3020     ADVANCE_BATCH(batch);
3021 }
3022
3023 static void
3024 gen7_emit_binding_table(VADriverContextP ctx)
3025 {
3026     struct i965_driver_data *i965 = i965_driver_data(ctx);
3027     struct intel_batchbuffer *batch = i965->batch;
3028
3029     BEGIN_BATCH(batch, 2);
3030     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3031     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3032     ADVANCE_BATCH(batch);
3033 }
3034
3035 static void
3036 gen7_emit_depth_buffer_state(VADriverContextP ctx)
3037 {
3038     struct i965_driver_data *i965 = i965_driver_data(ctx);
3039     struct intel_batchbuffer *batch = i965->batch;
3040
3041     BEGIN_BATCH(batch, 7);
3042     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
3043     OUT_BATCH(batch,
3044               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3045               (I965_SURFACE_NULL << 29));
3046     OUT_BATCH(batch, 0);
3047     OUT_BATCH(batch, 0);
3048     OUT_BATCH(batch, 0);
3049     OUT_BATCH(batch, 0);
3050     OUT_BATCH(batch, 0);
3051     ADVANCE_BATCH(batch);
3052
3053     BEGIN_BATCH(batch, 3);
3054     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3055     OUT_BATCH(batch, 0);
3056     OUT_BATCH(batch, 0);
3057     ADVANCE_BATCH(batch);
3058 }
3059
3060 static void
3061 gen7_emit_drawing_rectangle(VADriverContextP ctx)
3062 {
3063     i965_render_drawing_rectangle(ctx);
3064 }
3065
3066 static void 
3067 gen7_emit_vs_state(VADriverContextP ctx)
3068 {
3069     struct i965_driver_data *i965 = i965_driver_data(ctx);
3070     struct intel_batchbuffer *batch = i965->batch;
3071
3072     /* disable VS constant buffer */
3073     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
3074     OUT_BATCH(batch, 0);
3075     OUT_BATCH(batch, 0);
3076     OUT_BATCH(batch, 0);
3077     OUT_BATCH(batch, 0);
3078     OUT_BATCH(batch, 0);
3079     OUT_BATCH(batch, 0);
3080         
3081     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
3082     OUT_BATCH(batch, 0); /* without VS kernel */
3083     OUT_BATCH(batch, 0);
3084     OUT_BATCH(batch, 0);
3085     OUT_BATCH(batch, 0);
3086     OUT_BATCH(batch, 0); /* pass-through */
3087 }
3088
3089 static void 
3090 gen7_emit_bypass_state(VADriverContextP ctx)
3091 {
3092     struct i965_driver_data *i965 = i965_driver_data(ctx);
3093     struct intel_batchbuffer *batch = i965->batch;
3094
3095     /* bypass GS */
3096     BEGIN_BATCH(batch, 7);
3097     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3098     OUT_BATCH(batch, 0);
3099     OUT_BATCH(batch, 0);
3100     OUT_BATCH(batch, 0);
3101     OUT_BATCH(batch, 0);
3102     OUT_BATCH(batch, 0);
3103     OUT_BATCH(batch, 0);
3104     ADVANCE_BATCH(batch);
3105
3106     BEGIN_BATCH(batch, 7);      
3107     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3108     OUT_BATCH(batch, 0); /* without GS kernel */
3109     OUT_BATCH(batch, 0);
3110     OUT_BATCH(batch, 0);
3111     OUT_BATCH(batch, 0);
3112     OUT_BATCH(batch, 0);
3113     OUT_BATCH(batch, 0); /* pass-through */
3114     ADVANCE_BATCH(batch);
3115
3116     BEGIN_BATCH(batch, 2);
3117     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3118     OUT_BATCH(batch, 0);
3119     ADVANCE_BATCH(batch);
3120
3121     /* disable HS */
3122     BEGIN_BATCH(batch, 7);
3123     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3124     OUT_BATCH(batch, 0);
3125     OUT_BATCH(batch, 0);
3126     OUT_BATCH(batch, 0);
3127     OUT_BATCH(batch, 0);
3128     OUT_BATCH(batch, 0);
3129     OUT_BATCH(batch, 0);
3130     ADVANCE_BATCH(batch);
3131
3132     BEGIN_BATCH(batch, 7);
3133     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3134     OUT_BATCH(batch, 0);
3135     OUT_BATCH(batch, 0);
3136     OUT_BATCH(batch, 0);
3137     OUT_BATCH(batch, 0);
3138     OUT_BATCH(batch, 0);
3139     OUT_BATCH(batch, 0);
3140     ADVANCE_BATCH(batch);
3141
3142     BEGIN_BATCH(batch, 2);
3143     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3144     OUT_BATCH(batch, 0);
3145     ADVANCE_BATCH(batch);
3146
3147     /* Disable TE */
3148     BEGIN_BATCH(batch, 4);
3149     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3150     OUT_BATCH(batch, 0);
3151     OUT_BATCH(batch, 0);
3152     OUT_BATCH(batch, 0);
3153     ADVANCE_BATCH(batch);
3154
3155     /* Disable DS */
3156     BEGIN_BATCH(batch, 7);
3157     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3158     OUT_BATCH(batch, 0);
3159     OUT_BATCH(batch, 0);
3160     OUT_BATCH(batch, 0);
3161     OUT_BATCH(batch, 0);
3162     OUT_BATCH(batch, 0);
3163     OUT_BATCH(batch, 0);
3164     ADVANCE_BATCH(batch);
3165
3166     BEGIN_BATCH(batch, 6);
3167     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3168     OUT_BATCH(batch, 0);
3169     OUT_BATCH(batch, 0);
3170     OUT_BATCH(batch, 0);
3171     OUT_BATCH(batch, 0);
3172     OUT_BATCH(batch, 0);
3173     ADVANCE_BATCH(batch);
3174
3175     BEGIN_BATCH(batch, 2);
3176     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3177     OUT_BATCH(batch, 0);
3178     ADVANCE_BATCH(batch);
3179
3180     /* Disable STREAMOUT */
3181     BEGIN_BATCH(batch, 3);
3182     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3183     OUT_BATCH(batch, 0);
3184     OUT_BATCH(batch, 0);
3185     ADVANCE_BATCH(batch);
3186 }
3187
3188 static void 
3189 gen7_emit_clip_state(VADriverContextP ctx)
3190 {
3191     struct i965_driver_data *i965 = i965_driver_data(ctx);
3192     struct intel_batchbuffer *batch = i965->batch;
3193
3194     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3195     OUT_BATCH(batch, 0);
3196     OUT_BATCH(batch, 0); /* pass-through */
3197     OUT_BATCH(batch, 0);
3198 }
3199
3200 static void 
3201 gen7_emit_sf_state(VADriverContextP ctx)
3202 {
3203     struct i965_driver_data *i965 = i965_driver_data(ctx);
3204     struct intel_batchbuffer *batch = i965->batch;
3205
3206     BEGIN_BATCH(batch, 14);
3207     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3208     OUT_BATCH(batch,
3209               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3210               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3211               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3212     OUT_BATCH(batch, 0);
3213     OUT_BATCH(batch, 0);
3214     OUT_BATCH(batch, 0); /* DW4 */
3215     OUT_BATCH(batch, 0);
3216     OUT_BATCH(batch, 0);
3217     OUT_BATCH(batch, 0);
3218     OUT_BATCH(batch, 0);
3219     OUT_BATCH(batch, 0); /* DW9 */
3220     OUT_BATCH(batch, 0);
3221     OUT_BATCH(batch, 0);
3222     OUT_BATCH(batch, 0);
3223     OUT_BATCH(batch, 0);
3224     ADVANCE_BATCH(batch);
3225
3226     BEGIN_BATCH(batch, 7);
3227     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3228     OUT_BATCH(batch, 0);
3229     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3230     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3231     OUT_BATCH(batch, 0);
3232     OUT_BATCH(batch, 0);
3233     OUT_BATCH(batch, 0);
3234     ADVANCE_BATCH(batch);
3235 }
3236
3237 static void 
3238 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3239 {
3240     struct i965_driver_data *i965 = i965_driver_data(ctx);
3241     struct intel_batchbuffer *batch = i965->batch;
3242     struct i965_render_state *render_state = &i965->render_state;
3243     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3244     unsigned int num_samples = 0;
3245
3246     if (IS_HASWELL(i965->intel.device_id)) {
3247         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3248         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3249     }
3250
3251     BEGIN_BATCH(batch, 3);
3252     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3253     OUT_BATCH(batch,
3254               GEN7_WM_DISPATCH_ENABLE |
3255               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3256     OUT_BATCH(batch, 0);
3257     ADVANCE_BATCH(batch);
3258
3259     BEGIN_BATCH(batch, 7);
3260     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3261     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3262     OUT_BATCH(batch, 0);
3263     OUT_RELOC(batch, 
3264               render_state->curbe.bo,
3265               I915_GEM_DOMAIN_INSTRUCTION, 0,
3266               0);
3267     OUT_BATCH(batch, 0);
3268     OUT_BATCH(batch, 0);
3269     OUT_BATCH(batch, 0);
3270     ADVANCE_BATCH(batch);
3271
3272     BEGIN_BATCH(batch, 8);
3273     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3274     OUT_RELOC(batch, 
3275               render_state->render_kernels[kernel].bo,
3276               I915_GEM_DOMAIN_INSTRUCTION, 0,
3277               0);
3278     OUT_BATCH(batch, 
3279               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3280               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3281     OUT_BATCH(batch, 0); /* scratch space base offset */
3282     OUT_BATCH(batch, 
3283               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3284               GEN7_PS_PUSH_CONSTANT_ENABLE |
3285               GEN7_PS_ATTRIBUTE_ENABLE |
3286               GEN7_PS_16_DISPATCH_ENABLE);
3287     OUT_BATCH(batch, 
3288               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3289     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3290     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3291     ADVANCE_BATCH(batch);
3292 }
3293
3294 static void
3295 gen7_emit_vertex_element_state(VADriverContextP ctx)
3296 {
3297     struct i965_driver_data *i965 = i965_driver_data(ctx);
3298     struct intel_batchbuffer *batch = i965->batch;
3299
3300     /* Set up our vertex elements, sourced from the single vertex buffer. */
3301     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3302     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3303     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3304               GEN6_VE0_VALID |
3305               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3306               (0 << VE0_OFFSET_SHIFT));
3307     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3308               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3309               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3310               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3311     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3312     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3313               GEN6_VE0_VALID |
3314               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3315               (8 << VE0_OFFSET_SHIFT));
3316     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3317               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3318               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3319               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3320 }
3321
3322 static void
3323 gen7_emit_vertices(VADriverContextP ctx)
3324 {
3325     struct i965_driver_data *i965 = i965_driver_data(ctx);
3326     struct intel_batchbuffer *batch = i965->batch;
3327     struct i965_render_state *render_state = &i965->render_state;
3328
3329     BEGIN_BATCH(batch, 5);
3330     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3331     OUT_BATCH(batch, 
3332               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3333               GEN6_VB0_VERTEXDATA |
3334               GEN7_VB0_ADDRESS_MODIFYENABLE |
3335               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3336     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3337     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3338     OUT_BATCH(batch, 0);
3339     ADVANCE_BATCH(batch);
3340
3341     BEGIN_BATCH(batch, 7);
3342     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3343     OUT_BATCH(batch,
3344               _3DPRIM_RECTLIST |
3345               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3346     OUT_BATCH(batch, 3); /* vertex count per instance */
3347     OUT_BATCH(batch, 0); /* start vertex offset */
3348     OUT_BATCH(batch, 1); /* single instance */
3349     OUT_BATCH(batch, 0); /* start instance location */
3350     OUT_BATCH(batch, 0);
3351     ADVANCE_BATCH(batch);
3352 }
3353
3354 static void
3355 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3356 {
3357     struct i965_driver_data *i965 = i965_driver_data(ctx);
3358     struct intel_batchbuffer *batch = i965->batch;
3359
3360     intel_batchbuffer_start_atomic(batch, 0x1000);
3361     intel_batchbuffer_emit_mi_flush(batch);
3362     gen7_emit_invarient_states(ctx);
3363     gen7_emit_state_base_address(ctx);
3364     gen7_emit_viewport_state_pointers(ctx);
3365     gen7_emit_urb(ctx);
3366     gen7_emit_cc_state_pointers(ctx);
3367     gen7_emit_sampler_state_pointers(ctx);
3368     gen7_emit_bypass_state(ctx);
3369     gen7_emit_vs_state(ctx);
3370     gen7_emit_clip_state(ctx);
3371     gen7_emit_sf_state(ctx);
3372     gen7_emit_wm_state(ctx, kernel);
3373     gen7_emit_binding_table(ctx);
3374     gen7_emit_depth_buffer_state(ctx);
3375     gen7_emit_drawing_rectangle(ctx);
3376     gen7_emit_vertex_element_state(ctx);
3377     gen7_emit_vertices(ctx);
3378     intel_batchbuffer_end_atomic(batch);
3379 }
3380
3381 static void
3382 gen8_emit_vertices(VADriverContextP ctx)
3383 {
3384     struct i965_driver_data *i965 = i965_driver_data(ctx);
3385     struct intel_batchbuffer *batch = i965->batch;
3386     struct i965_render_state *render_state = &i965->render_state;
3387
3388     BEGIN_BATCH(batch, 5);
3389     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3390     OUT_BATCH(batch, 
3391               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
3392               (0 << GEN8_VB0_MOCS_SHIFT) |
3393               GEN7_VB0_ADDRESS_MODIFYENABLE |
3394               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3395     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3396     OUT_BATCH(batch, 0);
3397     OUT_BATCH(batch, 12 * 4);
3398     ADVANCE_BATCH(batch);
3399
3400     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
3401     BEGIN_BATCH(batch, 2);
3402     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
3403     OUT_BATCH(batch,
3404               _3DPRIM_RECTLIST);
3405     ADVANCE_BATCH(batch);
3406
3407     
3408     BEGIN_BATCH(batch, 7);
3409     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3410     OUT_BATCH(batch,
3411               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3412     OUT_BATCH(batch, 3); /* vertex count per instance */
3413     OUT_BATCH(batch, 0); /* start vertex offset */
3414     OUT_BATCH(batch, 1); /* single instance */
3415     OUT_BATCH(batch, 0); /* start instance location */
3416     OUT_BATCH(batch, 0);
3417     ADVANCE_BATCH(batch);
3418 }
3419
3420 static void
3421 gen8_emit_vertex_element_state(VADriverContextP ctx)
3422 {
3423     struct i965_driver_data *i965 = i965_driver_data(ctx);
3424     struct intel_batchbuffer *batch = i965->batch;
3425
3426     /* Set up our vertex elements, sourced from the single vertex buffer. */
3427     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3428     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3429     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3430               GEN8_VE0_VALID |
3431               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3432               (0 << VE0_OFFSET_SHIFT));
3433     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3434               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3435               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3436               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3437     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3438     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3439               GEN8_VE0_VALID |
3440               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3441               (8 << VE0_OFFSET_SHIFT));
3442     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3443               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3444               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3445               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3446 }
3447
3448 static void 
3449 gen8_emit_vs_state(VADriverContextP ctx)
3450 {
3451     struct i965_driver_data *i965 = i965_driver_data(ctx);
3452     struct intel_batchbuffer *batch = i965->batch;
3453
3454     /* disable VS constant buffer */
3455     BEGIN_BATCH(batch, 11);
3456     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
3457     OUT_BATCH(batch, 0);
3458     OUT_BATCH(batch, 0);
3459     /* CS Buffer 0 */
3460     OUT_BATCH(batch, 0);
3461     OUT_BATCH(batch, 0);
3462     /* CS Buffer 1 */
3463     OUT_BATCH(batch, 0);
3464     OUT_BATCH(batch, 0);
3465     /* CS Buffer 2 */
3466     OUT_BATCH(batch, 0);
3467     OUT_BATCH(batch, 0);
3468     /* CS Buffer 3 */
3469     OUT_BATCH(batch, 0);
3470     OUT_BATCH(batch, 0);
3471     ADVANCE_BATCH(batch);
3472         
3473     BEGIN_BATCH(batch, 9);
3474     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
3475     OUT_BATCH(batch, 0); /* without VS kernel */
3476     OUT_BATCH(batch, 0);
3477     /* VS shader dispatch flag */
3478     OUT_BATCH(batch, 0);
3479     OUT_BATCH(batch, 0);
3480     OUT_BATCH(batch, 0);
3481     /* DW6. VS shader GRF and URB buffer definition */
3482     OUT_BATCH(batch, 0);
3483     OUT_BATCH(batch, 0); /* pass-through */
3484     OUT_BATCH(batch, 0);
3485     ADVANCE_BATCH(batch);
3486 }
3487
3488 /*
3489  * URB layout on GEN8 
3490  * ----------------------------------------
3491  * | PS Push Constants (8KB) | VS entries |
3492  * ----------------------------------------
3493  */
3494 static void
3495 gen8_emit_urb(VADriverContextP ctx)
3496 {
3497     struct i965_driver_data *i965 = i965_driver_data(ctx);
3498     struct intel_batchbuffer *batch = i965->batch;
3499     unsigned int num_urb_entries = 64;
3500
3501     /* The minimum urb entries is 64 */
3502
3503     BEGIN_BATCH(batch, 2);
3504     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3505     /* Size is 8Kbs and base address is 0Kb */
3506     OUT_BATCH(batch,
3507                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
3508                 (4 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
3509     ADVANCE_BATCH(batch);
3510
3511     BEGIN_BATCH(batch, 2);
3512     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3513     OUT_BATCH(batch, 
3514               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3515               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3516               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3517    ADVANCE_BATCH(batch);
3518
3519    BEGIN_BATCH(batch, 2);
3520    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3521    OUT_BATCH(batch,
3522              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3523              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3524    ADVANCE_BATCH(batch);
3525
3526    BEGIN_BATCH(batch, 2);
3527    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3528    OUT_BATCH(batch,
3529              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3530              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3531    ADVANCE_BATCH(batch);
3532
3533    BEGIN_BATCH(batch, 2);
3534    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3535    OUT_BATCH(batch,
3536              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3537              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3538    ADVANCE_BATCH(batch);
3539 }
3540
3541 static void 
3542 gen8_emit_bypass_state(VADriverContextP ctx)
3543 {
3544     struct i965_driver_data *i965 = i965_driver_data(ctx);
3545     struct intel_batchbuffer *batch = i965->batch;
3546
3547     /* bypass GS */
3548     BEGIN_BATCH(batch, 11);
3549     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
3550     OUT_BATCH(batch, 0);
3551     OUT_BATCH(batch, 0);
3552     OUT_BATCH(batch, 0);
3553     OUT_BATCH(batch, 0);
3554     OUT_BATCH(batch, 0);
3555     OUT_BATCH(batch, 0);
3556     OUT_BATCH(batch, 0);
3557     OUT_BATCH(batch, 0);
3558     OUT_BATCH(batch, 0);
3559     OUT_BATCH(batch, 0);
3560     ADVANCE_BATCH(batch);
3561
3562     BEGIN_BATCH(batch, 10);     
3563     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
3564     /* GS shader address */
3565     OUT_BATCH(batch, 0); /* without GS kernel */
3566     OUT_BATCH(batch, 0);
3567     /* DW3. GS shader dispatch flag */
3568     OUT_BATCH(batch, 0);
3569     OUT_BATCH(batch, 0);
3570     OUT_BATCH(batch, 0);
3571     /* DW6. GS shader GRF and URB offset/length */
3572     OUT_BATCH(batch, 0);
3573     OUT_BATCH(batch, 0); /* pass-through */
3574     OUT_BATCH(batch, 0);
3575     OUT_BATCH(batch, 0);
3576     ADVANCE_BATCH(batch);
3577
3578     BEGIN_BATCH(batch, 2);
3579     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3580     OUT_BATCH(batch, 0);
3581     ADVANCE_BATCH(batch);
3582
3583     /* disable HS */
3584     BEGIN_BATCH(batch, 11);
3585     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
3586     OUT_BATCH(batch, 0);
3587     OUT_BATCH(batch, 0);
3588     OUT_BATCH(batch, 0);
3589     OUT_BATCH(batch, 0);
3590     OUT_BATCH(batch, 0);
3591     OUT_BATCH(batch, 0);
3592     OUT_BATCH(batch, 0);
3593     OUT_BATCH(batch, 0);
3594     OUT_BATCH(batch, 0);
3595     OUT_BATCH(batch, 0);
3596     ADVANCE_BATCH(batch);
3597
3598     BEGIN_BATCH(batch, 9);
3599     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
3600     OUT_BATCH(batch, 0);
3601     /*DW2. HS pass-through */
3602     OUT_BATCH(batch, 0);
3603     /*DW3. HS shader address */
3604     OUT_BATCH(batch, 0);
3605     OUT_BATCH(batch, 0);
3606     /*DW5. HS shader flag. URB offset/length and so on */
3607     OUT_BATCH(batch, 0);
3608     OUT_BATCH(batch, 0);
3609     OUT_BATCH(batch, 0);
3610     OUT_BATCH(batch, 0);
3611     ADVANCE_BATCH(batch);
3612
3613     BEGIN_BATCH(batch, 2);
3614     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3615     OUT_BATCH(batch, 0);
3616     ADVANCE_BATCH(batch);
3617
3618     /* Disable TE */
3619     BEGIN_BATCH(batch, 4);
3620     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3621     OUT_BATCH(batch, 0);
3622     OUT_BATCH(batch, 0);
3623     OUT_BATCH(batch, 0);
3624     ADVANCE_BATCH(batch);
3625
3626     /* Disable DS */
3627     BEGIN_BATCH(batch, 11);
3628     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
3629     OUT_BATCH(batch, 0);
3630     OUT_BATCH(batch, 0);
3631     OUT_BATCH(batch, 0);
3632     OUT_BATCH(batch, 0);
3633     OUT_BATCH(batch, 0);
3634     OUT_BATCH(batch, 0);
3635     OUT_BATCH(batch, 0);
3636     OUT_BATCH(batch, 0);
3637     OUT_BATCH(batch, 0);
3638     OUT_BATCH(batch, 0);
3639     ADVANCE_BATCH(batch);
3640
3641     BEGIN_BATCH(batch, 9);
3642     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
3643     /* DW1. DS shader pointer */
3644     OUT_BATCH(batch, 0);
3645     OUT_BATCH(batch, 0);
3646     /* DW3-5. DS shader dispatch flag.*/
3647     OUT_BATCH(batch, 0);
3648     OUT_BATCH(batch, 0);
3649     OUT_BATCH(batch, 0);
3650     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
3651     OUT_BATCH(batch, 0);
3652     OUT_BATCH(batch, 0);
3653     /* DW8. DS shader output URB */
3654     OUT_BATCH(batch, 0);
3655     ADVANCE_BATCH(batch);
3656
3657     BEGIN_BATCH(batch, 2);
3658     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3659     OUT_BATCH(batch, 0);
3660     ADVANCE_BATCH(batch);
3661
3662     /* Disable STREAMOUT */
3663     BEGIN_BATCH(batch, 5);
3664     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
3665     OUT_BATCH(batch, 0);
3666     OUT_BATCH(batch, 0);
3667     OUT_BATCH(batch, 0);
3668     OUT_BATCH(batch, 0);
3669     ADVANCE_BATCH(batch);
3670 }
3671
3672 static void
3673 gen8_emit_invarient_states(VADriverContextP ctx)
3674 {
3675     struct i965_driver_data *i965 = i965_driver_data(ctx);
3676     struct intel_batchbuffer *batch = i965->batch;
3677
3678     BEGIN_BATCH(batch, 1);
3679     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
3680     ADVANCE_BATCH(batch);
3681
3682     BEGIN_BATCH(batch, 2);
3683     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
3684     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
3685               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
3686     ADVANCE_BATCH(batch);
3687
3688     /* Update 3D Multisample pattern */
3689     BEGIN_BATCH(batch, 9);
3690     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
3691     OUT_BATCH(batch, 0);
3692     OUT_BATCH(batch, 0);
3693     OUT_BATCH(batch, 0);
3694     OUT_BATCH(batch, 0);
3695     OUT_BATCH(batch, 0);
3696     OUT_BATCH(batch, 0);
3697     OUT_BATCH(batch, 0);
3698     OUT_BATCH(batch, 0);
3699     ADVANCE_BATCH(batch);
3700
3701
3702     BEGIN_BATCH(batch, 2);
3703     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
3704     OUT_BATCH(batch, 1);
3705     ADVANCE_BATCH(batch);
3706
3707     /* Set system instruction pointer */
3708     BEGIN_BATCH(batch, 3);
3709     OUT_BATCH(batch, CMD_STATE_SIP | 0);
3710     OUT_BATCH(batch, 0);
3711     OUT_BATCH(batch, 0);
3712     ADVANCE_BATCH(batch);
3713 }
3714
3715 static void 
3716 gen8_emit_clip_state(VADriverContextP ctx)
3717 {
3718     struct i965_driver_data *i965 = i965_driver_data(ctx);
3719     struct intel_batchbuffer *batch = i965->batch;
3720
3721     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3722     OUT_BATCH(batch, 0);
3723     OUT_BATCH(batch, 0); /* pass-through */
3724     OUT_BATCH(batch, 0);
3725 }
3726
3727 static void 
3728 gen8_emit_sf_state(VADriverContextP ctx)
3729 {
3730     struct i965_driver_data *i965 = i965_driver_data(ctx);
3731     struct intel_batchbuffer *batch = i965->batch;
3732
3733     BEGIN_BATCH(batch, 5);
3734     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
3735     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
3736     OUT_BATCH(batch, 0);
3737     OUT_BATCH(batch, 0);
3738     OUT_BATCH(batch, 0);
3739     ADVANCE_BATCH(batch);
3740
3741
3742     BEGIN_BATCH(batch, 4);
3743     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
3744     OUT_BATCH(batch,
3745               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3746               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3747               (0 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3748     OUT_BATCH(batch, 0);
3749     OUT_BATCH(batch, 0);
3750     ADVANCE_BATCH(batch);
3751
3752     /* SBE for backend setup */
3753     BEGIN_BATCH(batch, 11);
3754     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
3755     OUT_BATCH(batch, 0);
3756     OUT_BATCH(batch, 0);
3757     OUT_BATCH(batch, 0);
3758     OUT_BATCH(batch, 0);
3759     OUT_BATCH(batch, 0);
3760     OUT_BATCH(batch, 0);
3761     OUT_BATCH(batch, 0);
3762     OUT_BATCH(batch, 0);
3763     OUT_BATCH(batch, 0);
3764     OUT_BATCH(batch, 0);
3765     ADVANCE_BATCH(batch);
3766
3767     BEGIN_BATCH(batch, 4);
3768     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
3769     OUT_BATCH(batch, 0);
3770     OUT_BATCH(batch, 0);
3771     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3772     ADVANCE_BATCH(batch);
3773 }
3774
3775 static void 
3776 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
3777 {
3778     struct i965_driver_data *i965 = i965_driver_data(ctx);
3779     struct intel_batchbuffer *batch = i965->batch;
3780     struct i965_render_state *render_state = &i965->render_state;
3781     unsigned int num_samples = 0;
3782     unsigned int max_threads;
3783
3784     max_threads = render_state->max_wm_threads - 2;
3785
3786     BEGIN_BATCH(batch, 2);
3787     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
3788     OUT_BATCH(batch,
3789               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
3790     ADVANCE_BATCH(batch);
3791
3792     
3793     if (kernel == PS_KERNEL) {
3794         BEGIN_BATCH(batch, 2);
3795         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
3796         OUT_BATCH(batch,
3797                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
3798         ADVANCE_BATCH(batch);
3799     } else if (kernel == PS_SUBPIC_KERNEL) {
3800         BEGIN_BATCH(batch, 2);
3801         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
3802         OUT_BATCH(batch,
3803                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
3804                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
3805                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
3806                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
3807                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
3808                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
3809         ADVANCE_BATCH(batch);
3810     }
3811
3812     BEGIN_BATCH(batch, 2);
3813     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
3814     OUT_BATCH(batch,
3815               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3816     ADVANCE_BATCH(batch);
3817
3818     BEGIN_BATCH(batch, 11);
3819     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
3820     OUT_BATCH(batch, 1);
3821     OUT_BATCH(batch, 0);
3822     /*DW3-4. Constant buffer 0 */
3823     OUT_RELOC(batch, 
3824               render_state->curbe.bo,
3825               I915_GEM_DOMAIN_INSTRUCTION, 0,
3826               0);
3827     OUT_BATCH(batch, 0);
3828
3829     /*DW5-10. Constant buffer 1-3 */
3830     OUT_BATCH(batch, 0);
3831     OUT_BATCH(batch, 0);
3832     OUT_BATCH(batch, 0);
3833     OUT_BATCH(batch, 0);
3834     OUT_BATCH(batch, 0);
3835     OUT_BATCH(batch, 0);
3836     ADVANCE_BATCH(batch);
3837
3838     BEGIN_BATCH(batch, 12);
3839     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
3840     /* PS shader address */
3841     OUT_RELOC(batch, 
3842               render_state->render_kernels[kernel].bo,
3843               I915_GEM_DOMAIN_INSTRUCTION, 0,
3844               0);
3845     OUT_BATCH(batch, 0);
3846     /* DW3. PS shader flag .Binding table cnt/sample cnt */
3847     OUT_BATCH(batch, 
3848               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3849               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3850     /* DW4-5. Scatch space */
3851     OUT_BATCH(batch, 0); /* scratch space base offset */
3852     OUT_BATCH(batch, 0);
3853     /* DW6. PS shader threads. */
3854     OUT_BATCH(batch, 
3855               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
3856               GEN7_PS_PUSH_CONSTANT_ENABLE |
3857               GEN7_PS_16_DISPATCH_ENABLE);
3858     /* DW7. PS shader GRF */
3859     OUT_BATCH(batch, 
3860               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3861     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3862     OUT_BATCH(batch, 0);
3863     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3864     OUT_BATCH(batch, 0);
3865     ADVANCE_BATCH(batch);
3866
3867     BEGIN_BATCH(batch, 2);
3868     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3869     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3870     ADVANCE_BATCH(batch);
3871 }
3872
3873 static void
3874 gen8_emit_depth_buffer_state(VADriverContextP ctx)
3875 {
3876     struct i965_driver_data *i965 = i965_driver_data(ctx);
3877     struct intel_batchbuffer *batch = i965->batch;
3878
3879     BEGIN_BATCH(batch, 8);
3880     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
3881     OUT_BATCH(batch,
3882               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3883               (I965_SURFACE_NULL << 29));
3884     /* DW2-3. Depth Buffer Address */
3885     OUT_BATCH(batch, 0);
3886     OUT_BATCH(batch, 0);
3887     /* DW4-7. Surface structure */
3888     OUT_BATCH(batch, 0);
3889     OUT_BATCH(batch, 0);
3890     OUT_BATCH(batch, 0);
3891     OUT_BATCH(batch, 0);
3892     ADVANCE_BATCH(batch);
3893
3894     /* Update the Hier Depth buffer */
3895     BEGIN_BATCH(batch, 5);
3896     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
3897     OUT_BATCH(batch, 0);
3898     OUT_BATCH(batch, 0);
3899     OUT_BATCH(batch, 0);
3900     OUT_BATCH(batch, 0);
3901     ADVANCE_BATCH(batch);
3902     
3903     /* Update the stencil buffer */
3904     BEGIN_BATCH(batch, 5);
3905     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
3906     OUT_BATCH(batch, 0);
3907     OUT_BATCH(batch, 0);
3908     OUT_BATCH(batch, 0);
3909     OUT_BATCH(batch, 0);
3910     ADVANCE_BATCH(batch);
3911     
3912     BEGIN_BATCH(batch, 3);
3913     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3914     OUT_BATCH(batch, 0);
3915     OUT_BATCH(batch, 0);
3916     ADVANCE_BATCH(batch);
3917 }
3918
3919 static void
3920 gen8_emit_depth_stencil_state(VADriverContextP ctx)
3921 {
3922     struct i965_driver_data *i965 = i965_driver_data(ctx);
3923     struct intel_batchbuffer *batch = i965->batch;
3924
3925     BEGIN_BATCH(batch, 3);
3926     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
3927     OUT_BATCH(batch, 0);
3928     OUT_BATCH(batch, 0);
3929     ADVANCE_BATCH(batch);
3930 }
3931
3932 static void
3933 gen8_render_emit_states(VADriverContextP ctx, int kernel)
3934 {
3935     struct i965_driver_data *i965 = i965_driver_data(ctx);
3936     struct intel_batchbuffer *batch = i965->batch;
3937
3938     intel_batchbuffer_start_atomic(batch, 0x1000);
3939     intel_batchbuffer_emit_mi_flush(batch);
3940     gen8_emit_invarient_states(ctx);
3941     gen8_emit_state_base_address(ctx);
3942     gen7_emit_viewport_state_pointers(ctx);
3943     gen8_emit_urb(ctx);
3944     gen8_emit_cc_state_pointers(ctx);
3945     gen7_emit_sampler_state_pointers(ctx);
3946     gen8_emit_bypass_state(ctx);
3947     gen8_emit_vs_state(ctx);
3948     gen8_emit_clip_state(ctx);
3949     gen8_emit_sf_state(ctx);
3950     gen8_emit_depth_stencil_state(ctx);
3951     gen8_emit_wm_state(ctx, kernel);
3952     gen8_emit_depth_buffer_state(ctx);
3953     gen7_emit_drawing_rectangle(ctx);
3954     gen8_emit_vertex_element_state(ctx);
3955     gen8_emit_vertices(ctx);
3956     intel_batchbuffer_end_atomic(batch);
3957 }
3958
3959 static void
3960 gen7_render_put_surface(
3961     VADriverContextP   ctx,
3962     struct object_surface *obj_surface,    
3963     const VARectangle *src_rect,
3964     const VARectangle *dst_rect,
3965     unsigned int       flags
3966 )
3967 {
3968     struct i965_driver_data *i965 = i965_driver_data(ctx);
3969     struct intel_batchbuffer *batch = i965->batch;
3970
3971     gen7_render_initialize(ctx);
3972     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3973     i965_clear_dest_region(ctx);
3974     gen7_render_emit_states(ctx, PS_KERNEL);
3975     intel_batchbuffer_flush(batch);
3976 }
3977
3978 static void
3979 gen8_render_put_surface(
3980     VADriverContextP   ctx,
3981     struct object_surface *obj_surface,    
3982     const VARectangle *src_rect,
3983     const VARectangle *dst_rect,
3984     unsigned int       flags
3985 )
3986 {
3987     struct i965_driver_data *i965 = i965_driver_data(ctx);
3988     struct intel_batchbuffer *batch = i965->batch;
3989
3990     gen8_render_initialize(ctx);
3991     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3992     gen8_clear_dest_region(ctx);
3993     gen8_render_emit_states(ctx, PS_KERNEL);
3994     intel_batchbuffer_flush(batch);
3995 }
3996
3997 static void
3998 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3999 {
4000     struct i965_driver_data *i965 = i965_driver_data(ctx);
4001     struct i965_render_state *render_state = &i965->render_state;
4002     struct gen6_blend_state *blend_state;
4003
4004     dri_bo_unmap(render_state->cc.state);    
4005     dri_bo_map(render_state->cc.blend, 1);
4006     assert(render_state->cc.blend->virtual);
4007     blend_state = render_state->cc.blend->virtual;
4008     memset(blend_state, 0, sizeof(*blend_state));
4009     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4010     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4011     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
4012     blend_state->blend0.blend_enable = 1;
4013     blend_state->blend1.post_blend_clamp_enable = 1;
4014     blend_state->blend1.pre_blend_clamp_enable = 1;
4015     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4016     dri_bo_unmap(render_state->cc.blend);
4017 }
4018
4019 static void
4020 gen8_subpicture_render_blend_state(VADriverContextP ctx)
4021 {
4022     struct i965_driver_data *i965 = i965_driver_data(ctx);
4023     struct i965_render_state *render_state = &i965->render_state;
4024     struct gen8_global_blend_state *global_blend_state;
4025     struct gen8_blend_state_rt *blend_state;
4026
4027     dri_bo_map(render_state->cc.blend, 1);
4028     assert(render_state->cc.blend->virtual);
4029     global_blend_state = render_state->cc.blend->virtual;
4030     memset(global_blend_state, 0, sizeof(*global_blend_state));
4031     /* Global blend state + blend_state for Render Target */
4032     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
4033     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4034     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4035     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
4036     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4037     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4038     blend_state->blend0.colorbuf_blend = 1;
4039     blend_state->blend1.post_blend_clamp_enable = 1;
4040     blend_state->blend1.pre_blend_clamp_enable = 1;
4041     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4042     dri_bo_unmap(render_state->cc.blend);
4043 }
4044
4045 static void
4046 gen7_subpicture_render_setup_states(
4047     VADriverContextP   ctx,
4048     struct object_surface *obj_surface,
4049     const VARectangle *src_rect,
4050     const VARectangle *dst_rect
4051 )
4052 {
4053     i965_render_dest_surface_state(ctx, 0);
4054     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4055     i965_render_sampler(ctx);
4056     i965_render_cc_viewport(ctx);
4057     gen7_render_color_calc_state(ctx);
4058     gen7_subpicture_render_blend_state(ctx);
4059     gen7_render_depth_stencil_state(ctx);
4060     i965_subpic_render_upload_constants(ctx, obj_surface);
4061     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4062 }
4063
4064 static void
4065 gen8_subpicture_render_setup_states(
4066     VADriverContextP   ctx,
4067     struct object_surface *obj_surface,
4068     const VARectangle *src_rect,
4069     const VARectangle *dst_rect
4070 )
4071 {
4072     i965_render_dest_surface_state(ctx, 0);
4073     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4074     gen8_render_sampler(ctx);
4075     i965_render_cc_viewport(ctx);
4076     gen7_render_color_calc_state(ctx);
4077     gen8_subpicture_render_blend_state(ctx);
4078     i965_subpic_render_upload_constants(ctx, obj_surface);
4079     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4080 }
4081
4082 static void
4083 gen7_render_put_subpicture(
4084     VADriverContextP   ctx,
4085     struct object_surface *obj_surface,
4086     const VARectangle *src_rect,
4087     const VARectangle *dst_rect
4088 )
4089 {
4090     struct i965_driver_data *i965 = i965_driver_data(ctx);
4091     struct intel_batchbuffer *batch = i965->batch;
4092     unsigned int index = obj_surface->subpic_render_idx;
4093     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4094
4095     assert(obj_subpic);
4096     gen7_render_initialize(ctx);
4097     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4098     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4099     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4100     intel_batchbuffer_flush(batch);
4101 }
4102
4103 static void
4104 gen8_render_put_subpicture(
4105     VADriverContextP   ctx,
4106     struct object_surface *obj_surface,
4107     const VARectangle *src_rect,
4108     const VARectangle *dst_rect
4109 )
4110 {
4111     struct i965_driver_data *i965 = i965_driver_data(ctx);
4112     struct intel_batchbuffer *batch = i965->batch;
4113     unsigned int index = obj_surface->subpic_render_idx;
4114     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4115
4116     assert(obj_subpic);
4117     gen8_render_initialize(ctx);
4118     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4119     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4120     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4121     intel_batchbuffer_flush(batch);
4122 }
4123
4124 /*
4125  * global functions
4126  */
4127 VAStatus 
4128 i965_DestroySurfaces(VADriverContextP ctx,
4129                      VASurfaceID *surface_list,
4130                      int num_surfaces);
4131 void
4132 intel_render_put_surface(
4133     VADriverContextP   ctx,
4134     struct object_surface *obj_surface,
4135     const VARectangle *src_rect,
4136     const VARectangle *dst_rect,
4137     unsigned int       flags
4138 )
4139 {
4140     struct i965_driver_data *i965 = i965_driver_data(ctx);
4141     int has_done_scaling = 0;
4142     VASurfaceID out_surface_id = i965_post_processing(ctx,
4143                                                       obj_surface,
4144                                                       src_rect,
4145                                                       dst_rect,
4146                                                       flags,
4147                                                       &has_done_scaling);
4148
4149     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
4150
4151     if (out_surface_id != VA_INVALID_ID) {
4152         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
4153         
4154         if (new_obj_surface && new_obj_surface->bo)
4155             obj_surface = new_obj_surface;
4156
4157         if (has_done_scaling)
4158             src_rect = dst_rect;
4159     }
4160
4161     if (IS_GEN8(i965->intel.device_id))
4162         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4163     else if (IS_GEN7(i965->intel.device_id))
4164         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4165     else if (IS_GEN6(i965->intel.device_id))
4166         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4167     else
4168         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4169
4170     if (out_surface_id != VA_INVALID_ID)
4171         i965_DestroySurfaces(ctx, &out_surface_id, 1);
4172 }
4173
4174 void
4175 intel_render_put_subpicture(
4176     VADriverContextP   ctx,
4177     struct object_surface *obj_surface,
4178     const VARectangle *src_rect,
4179     const VARectangle *dst_rect
4180 )
4181 {
4182     struct i965_driver_data *i965 = i965_driver_data(ctx);
4183
4184     if (IS_GEN8(i965->intel.device_id))
4185         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4186     else if (IS_GEN7(i965->intel.device_id))
4187         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4188     else if (IS_GEN6(i965->intel.device_id))
4189         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4190     else
4191         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4192 }
4193
4194 bool 
4195 i965_render_init(VADriverContextP ctx)
4196 {
4197     struct i965_driver_data *i965 = i965_driver_data(ctx);
4198     struct i965_render_state *render_state = &i965->render_state;
4199     int i;
4200
4201     /* kernel */
4202     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
4203                                  sizeof(render_kernels_gen5[0])));
4204     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
4205                                  sizeof(render_kernels_gen6[0])));
4206
4207     if (IS_GEN8(i965->intel.device_id)) {
4208         memcpy(render_state->render_kernels, render_kernels_gen8,
4209                         sizeof(render_state->render_kernels));
4210     } else  if (IS_GEN7(i965->intel.device_id)) 
4211         memcpy(render_state->render_kernels,
4212                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
4213                sizeof(render_state->render_kernels));
4214     else if (IS_GEN6(i965->intel.device_id))
4215         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
4216     else if (IS_IRONLAKE(i965->intel.device_id))
4217         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
4218     else
4219         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
4220
4221     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4222         struct i965_kernel *kernel = &render_state->render_kernels[i];
4223
4224         if (!kernel->size)
4225             continue;
4226
4227         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
4228                                   kernel->name, 
4229                                   kernel->size, 0x1000);
4230         assert(kernel->bo);
4231         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
4232     }
4233
4234     /* constant buffer */
4235     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
4236                       "constant buffer",
4237                       4096, 64);
4238     assert(render_state->curbe.bo);
4239
4240     if (IS_GEN8(i965->intel.device_id)) {
4241         render_state->max_wm_threads = 48;
4242         if (IS_BDW_GT1(i965->intel.device_id))
4243             render_state->max_wm_threads = 120;
4244         else if (IS_BDW_GT2(i965->intel.device_id))
4245             render_state->max_wm_threads = 180;
4246         else if (IS_BDW_GT2PLUS(i965->intel.device_id))
4247             render_state->max_wm_threads = 360;
4248     } else if (IS_HSW_GT1(i965->intel.device_id)) {
4249         render_state->max_wm_threads = 102;
4250     } else if (IS_HSW_GT2(i965->intel.device_id)) {
4251         render_state->max_wm_threads = 204;
4252     } else if (IS_HSW_GT3(i965->intel.device_id)) {
4253         render_state->max_wm_threads = 408;
4254     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
4255         render_state->max_wm_threads = 48;
4256     } else if (IS_IVB_GT2(i965->intel.device_id)) {
4257         render_state->max_wm_threads = 172;
4258     } else if (IS_SNB_GT1(i965->intel.device_id)) {
4259         render_state->max_wm_threads = 40;
4260     } else if (IS_SNB_GT2(i965->intel.device_id)) {
4261         render_state->max_wm_threads = 80;
4262     } else if (IS_IRONLAKE(i965->intel.device_id)) {
4263         render_state->max_wm_threads = 72; /* 12 * 6 */
4264     } else if (IS_G4X(i965->intel.device_id)) {
4265         render_state->max_wm_threads = 50; /* 12 * 5 */
4266     } else {
4267         /* should never get here !!! */
4268         assert(0);
4269     }
4270
4271     return true;
4272 }
4273
4274 void 
4275 i965_render_terminate(VADriverContextP ctx)
4276 {
4277     int i;
4278     struct i965_driver_data *i965 = i965_driver_data(ctx);
4279     struct i965_render_state *render_state = &i965->render_state;
4280
4281     dri_bo_unreference(render_state->curbe.bo);
4282     render_state->curbe.bo = NULL;
4283
4284     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4285         struct i965_kernel *kernel = &render_state->render_kernels[i];
4286         
4287         dri_bo_unreference(kernel->bo);
4288         kernel->bo = NULL;
4289     }
4290
4291     dri_bo_unreference(render_state->vb.vertex_buffer);
4292     render_state->vb.vertex_buffer = NULL;
4293     dri_bo_unreference(render_state->vs.state);
4294     render_state->vs.state = NULL;
4295     dri_bo_unreference(render_state->sf.state);
4296     render_state->sf.state = NULL;
4297     dri_bo_unreference(render_state->wm.sampler);
4298     render_state->wm.sampler = NULL;
4299     dri_bo_unreference(render_state->wm.state);
4300     render_state->wm.state = NULL;
4301     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4302     dri_bo_unreference(render_state->cc.viewport);
4303     render_state->cc.viewport = NULL;
4304     dri_bo_unreference(render_state->cc.state);
4305     render_state->cc.state = NULL;
4306     dri_bo_unreference(render_state->cc.blend);
4307     render_state->cc.blend = NULL;
4308     dri_bo_unreference(render_state->cc.depth_stencil);
4309     render_state->cc.depth_stencil = NULL;
4310
4311     if (render_state->draw_region) {
4312         dri_bo_unreference(render_state->draw_region->bo);
4313         free(render_state->draw_region);
4314         render_state->draw_region = NULL;
4315     }
4316 }
4317