Fix drivers build.
[profile/ivi/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44
45 #include "i965_defines.h"
46 #include "i965_render.h"
47 #include "i965_drv_video.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const unsigned int sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const unsigned int ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const unsigned int ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IGDNG */
79 static const unsigned int sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const unsigned int ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const unsigned int ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 static uint32_t float_to_uint (float f) 
101 {
102     union {
103         uint32_t i; 
104         float f;
105     } x;
106
107     x.f = f;
108     return x.i;
109 }
110
111 enum 
112 {
113     SF_KERNEL = 0,
114     PS_KERNEL,
115     PS_SUBPIC_KERNEL
116 };
117
118 struct render_kernel
119 {
120     char *name;
121     const unsigned int (*bin)[4];
122     int size;
123     dri_bo *bo;
124 };
125
126 static struct render_kernel render_kernels_gen4[] = {
127     {
128         "SF",
129         sf_kernel_static,
130         sizeof(sf_kernel_static),
131         NULL
132     },
133     {
134         "PS",
135         ps_kernel_static,
136         sizeof(ps_kernel_static),
137         NULL
138     },
139
140     {
141         "PS_SUBPIC",
142         ps_subpic_kernel_static,
143         sizeof(ps_subpic_kernel_static),
144         NULL
145     }
146 };
147
148 static struct render_kernel render_kernels_gen5[] = {
149     {
150         "SF",
151         sf_kernel_static_gen5,
152         sizeof(sf_kernel_static_gen5),
153         NULL
154     },
155     {
156         "PS",
157         ps_kernel_static_gen5,
158         sizeof(ps_kernel_static_gen5),
159         NULL
160     },
161
162     {
163         "PS_SUBPIC",
164         ps_subpic_kernel_static_gen5,
165         sizeof(ps_subpic_kernel_static_gen5),
166         NULL
167     }
168 };
169
170 static struct render_kernel *render_kernels = NULL;
171
172 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
173
174 #define URB_VS_ENTRIES        8
175 #define URB_VS_ENTRY_SIZE     1
176
177 #define URB_GS_ENTRIES        0
178 #define URB_GS_ENTRY_SIZE     0
179
180 #define URB_CLIP_ENTRIES      0
181 #define URB_CLIP_ENTRY_SIZE   0
182
183 #define URB_SF_ENTRIES        1
184 #define URB_SF_ENTRY_SIZE     2
185
186 #define URB_CS_ENTRIES        0
187 #define URB_CS_ENTRY_SIZE     0
188
189 static void
190 i965_render_vs_unit(VADriverContextP ctx)
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct i965_render_state *render_state = &i965->render_state;
194     struct i965_vs_unit_state *vs_state;
195
196     dri_bo_map(render_state->vs.state, 1);
197     assert(render_state->vs.state->virtual);
198     vs_state = render_state->vs.state->virtual;
199     memset(vs_state, 0, sizeof(*vs_state));
200
201     if (IS_IGDNG(i965->intel.device_id))
202         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
203     else
204         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
205
206     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
207     vs_state->vs6.vs_enable = 0;
208     vs_state->vs6.vert_cache_disable = 1;
209     
210     dri_bo_unmap(render_state->vs.state);
211 }
212
213 static void
214 i965_render_sf_unit(VADriverContextP ctx)
215 {
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217     struct i965_render_state *render_state = &i965->render_state;
218     struct i965_sf_unit_state *sf_state;
219
220     dri_bo_map(render_state->sf.state, 1);
221     assert(render_state->sf.state->virtual);
222     sf_state = render_state->sf.state->virtual;
223     memset(sf_state, 0, sizeof(*sf_state));
224
225     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
226     sf_state->thread0.kernel_start_pointer = render_kernels[SF_KERNEL].bo->offset >> 6;
227
228     sf_state->sf1.single_program_flow = 1; /* XXX */
229     sf_state->sf1.binding_table_entry_count = 0;
230     sf_state->sf1.thread_priority = 0;
231     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
232     sf_state->sf1.illegal_op_exception_enable = 1;
233     sf_state->sf1.mask_stack_exception_enable = 1;
234     sf_state->sf1.sw_exception_enable = 1;
235
236     /* scratch space is not used in our kernel */
237     sf_state->thread2.per_thread_scratch_space = 0;
238     sf_state->thread2.scratch_space_base_pointer = 0;
239
240     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
241     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
242     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
243     sf_state->thread3.urb_entry_read_offset = 0;
244     sf_state->thread3.dispatch_grf_start_reg = 3;
245
246     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
247     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
248     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
249     sf_state->thread4.stats_enable = 1;
250
251     sf_state->sf5.viewport_transform = 0; /* skip viewport */
252
253     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
254     sf_state->sf6.scissor = 0;
255
256     sf_state->sf7.trifan_pv = 2;
257
258     sf_state->sf6.dest_org_vbias = 0x8;
259     sf_state->sf6.dest_org_hbias = 0x8;
260
261     dri_bo_emit_reloc(render_state->sf.state,
262                       I915_GEM_DOMAIN_INSTRUCTION, 0,
263                       sf_state->thread0.grf_reg_count << 1,
264                       offsetof(struct i965_sf_unit_state, thread0),
265                       render_kernels[SF_KERNEL].bo);
266
267     dri_bo_unmap(render_state->sf.state);
268 }
269
270 static void 
271 i965_render_sampler(VADriverContextP ctx)
272 {
273     struct i965_driver_data *i965 = i965_driver_data(ctx);
274     struct i965_render_state *render_state = &i965->render_state;
275     struct i965_sampler_state *sampler_state;
276     int i;
277     
278     assert(render_state->wm.sampler_count > 0);
279     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
280
281     dri_bo_map(render_state->wm.sampler, 1);
282     assert(render_state->wm.sampler->virtual);
283     sampler_state = render_state->wm.sampler->virtual;
284     for (i = 0; i < render_state->wm.sampler_count; i++) {
285         memset(sampler_state, 0, sizeof(*sampler_state));
286         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
287         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
288         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
289         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
290         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
291         sampler_state++;
292     }
293
294     dri_bo_unmap(render_state->wm.sampler);
295 }
296 static void
297 i965_subpic_render_wm_unit(VADriverContextP ctx)
298 {
299     struct i965_driver_data *i965 = i965_driver_data(ctx);
300     struct i965_render_state *render_state = &i965->render_state;
301     struct i965_wm_unit_state *wm_state;
302
303     assert(render_state->wm.sampler);
304
305     dri_bo_map(render_state->wm.state, 1);
306     assert(render_state->wm.state->virtual);
307     wm_state = render_state->wm.state->virtual;
308     memset(wm_state, 0, sizeof(*wm_state));
309
310     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
311     wm_state->thread0.kernel_start_pointer = render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
312
313     wm_state->thread1.single_program_flow = 1; /* XXX */
314
315     if (IS_IGDNG(i965->intel.device_id))
316         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
317     else
318         wm_state->thread1.binding_table_entry_count = 7;
319
320     wm_state->thread2.scratch_space_base_pointer = 0;
321     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
322
323     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
324     wm_state->thread3.const_urb_entry_read_length = 0;
325     wm_state->thread3.const_urb_entry_read_offset = 0;
326     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
327     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
328
329     wm_state->wm4.stats_enable = 0;
330     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
331
332     if (IS_IGDNG(i965->intel.device_id))
333         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
334     else
335         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
336
337     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
338     wm_state->wm5.thread_dispatch_enable = 1;
339     wm_state->wm5.enable_16_pix = 1;
340     wm_state->wm5.enable_8_pix = 0;
341     wm_state->wm5.early_depth_test = 1;
342
343     dri_bo_emit_reloc(render_state->wm.state,
344                       I915_GEM_DOMAIN_INSTRUCTION, 0,
345                       wm_state->thread0.grf_reg_count << 1,
346                       offsetof(struct i965_wm_unit_state, thread0),
347                       render_kernels[PS_SUBPIC_KERNEL].bo);
348
349     dri_bo_emit_reloc(render_state->wm.state,
350                       I915_GEM_DOMAIN_INSTRUCTION, 0,
351                       wm_state->wm4.sampler_count << 2,
352                       offsetof(struct i965_wm_unit_state, wm4),
353                       render_state->wm.sampler);
354
355     dri_bo_unmap(render_state->wm.state);
356 }
357
358
359 static void
360 i965_render_wm_unit(VADriverContextP ctx)
361 {
362     struct i965_driver_data *i965 = i965_driver_data(ctx);
363     struct i965_render_state *render_state = &i965->render_state;
364     struct i965_wm_unit_state *wm_state;
365
366     assert(render_state->wm.sampler);
367
368     dri_bo_map(render_state->wm.state, 1);
369     assert(render_state->wm.state->virtual);
370     wm_state = render_state->wm.state->virtual;
371     memset(wm_state, 0, sizeof(*wm_state));
372
373     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
374     wm_state->thread0.kernel_start_pointer = render_kernels[PS_KERNEL].bo->offset >> 6;
375
376     wm_state->thread1.single_program_flow = 1; /* XXX */
377
378     if (IS_IGDNG(i965->intel.device_id))
379         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
380     else
381         wm_state->thread1.binding_table_entry_count = 7;
382
383     wm_state->thread2.scratch_space_base_pointer = 0;
384     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
385
386     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
387     wm_state->thread3.const_urb_entry_read_length = 0;
388     wm_state->thread3.const_urb_entry_read_offset = 0;
389     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
390     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
391
392     wm_state->wm4.stats_enable = 0;
393     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
394
395     if (IS_IGDNG(i965->intel.device_id))
396         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
397     else 
398         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
399
400     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
401     wm_state->wm5.thread_dispatch_enable = 1;
402     wm_state->wm5.enable_16_pix = 1;
403     wm_state->wm5.enable_8_pix = 0;
404     wm_state->wm5.early_depth_test = 1;
405
406     dri_bo_emit_reloc(render_state->wm.state,
407                       I915_GEM_DOMAIN_INSTRUCTION, 0,
408                       wm_state->thread0.grf_reg_count << 1,
409                       offsetof(struct i965_wm_unit_state, thread0),
410                       render_kernels[PS_KERNEL].bo);
411
412     dri_bo_emit_reloc(render_state->wm.state,
413                       I915_GEM_DOMAIN_INSTRUCTION, 0,
414                       wm_state->wm4.sampler_count << 2,
415                       offsetof(struct i965_wm_unit_state, wm4),
416                       render_state->wm.sampler);
417
418     dri_bo_unmap(render_state->wm.state);
419 }
420
421 static void 
422 i965_render_cc_viewport(VADriverContextP ctx)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct i965_render_state *render_state = &i965->render_state;
426     struct i965_cc_viewport *cc_viewport;
427
428     dri_bo_map(render_state->cc.viewport, 1);
429     assert(render_state->cc.viewport->virtual);
430     cc_viewport = render_state->cc.viewport->virtual;
431     memset(cc_viewport, 0, sizeof(*cc_viewport));
432     
433     cc_viewport->min_depth = -1.e35;
434     cc_viewport->max_depth = 1.e35;
435
436     dri_bo_unmap(render_state->cc.viewport);
437 }
438
439 static void 
440 i965_subpic_render_cc_unit(VADriverContextP ctx)
441 {
442     struct i965_driver_data *i965 = i965_driver_data(ctx);
443     struct i965_render_state *render_state = &i965->render_state;
444     struct i965_cc_unit_state *cc_state;
445
446     assert(render_state->cc.viewport);
447
448     dri_bo_map(render_state->cc.state, 1);
449     assert(render_state->cc.state->virtual);
450     cc_state = render_state->cc.state->virtual;
451     memset(cc_state, 0, sizeof(*cc_state));
452
453     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
454     cc_state->cc2.depth_test = 0;       /* disable depth test */
455     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
456     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
457     cc_state->cc3.blend_enable = 1;     /* enable color blend */
458     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
459     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
460     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
461     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
462
463     cc_state->cc5.dither_enable = 0;    /* disable dither */
464     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
465     cc_state->cc5.statistics_enable = 1;
466     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
467     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
468     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
469
470     cc_state->cc6.clamp_post_alpha_blend = 0; 
471     cc_state->cc6.clamp_pre_alpha_blend  =0; 
472     
473     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
474     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
475     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
476     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
477    
478     /*alpha test reference*/
479     cc_state->cc7.alpha_ref.f =0.0 ;
480
481
482     dri_bo_emit_reloc(render_state->cc.state,
483                       I915_GEM_DOMAIN_INSTRUCTION, 0,
484                       0,
485                       offsetof(struct i965_cc_unit_state, cc4),
486                       render_state->cc.viewport);
487
488     dri_bo_unmap(render_state->cc.state);
489 }
490
491
492 static void 
493 i965_render_cc_unit(VADriverContextP ctx)
494 {
495     struct i965_driver_data *i965 = i965_driver_data(ctx);
496     struct i965_render_state *render_state = &i965->render_state;
497     struct i965_cc_unit_state *cc_state;
498
499     assert(render_state->cc.viewport);
500
501     dri_bo_map(render_state->cc.state, 1);
502     assert(render_state->cc.state->virtual);
503     cc_state = render_state->cc.state->virtual;
504     memset(cc_state, 0, sizeof(*cc_state));
505
506     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
507     cc_state->cc2.depth_test = 0;       /* disable depth test */
508     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
509     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
510     cc_state->cc3.blend_enable = 0;     /* disable color blend */
511     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
512     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
513
514     cc_state->cc5.dither_enable = 0;    /* disable dither */
515     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
516     cc_state->cc5.statistics_enable = 1;
517     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
518     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
519     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
520
521     dri_bo_emit_reloc(render_state->cc.state,
522                       I915_GEM_DOMAIN_INSTRUCTION, 0,
523                       0,
524                       offsetof(struct i965_cc_unit_state, cc4),
525                       render_state->cc.viewport);
526
527     dri_bo_unmap(render_state->cc.state);
528 }
529
530 static void
531 i965_render_src_surface_state(VADriverContextP ctx, 
532                               int index,
533                               dri_bo *region,
534                               unsigned long offset,
535                               int w, int h)
536 {
537     struct i965_driver_data *i965 = i965_driver_data(ctx);  
538     struct i965_render_state *render_state = &i965->render_state;
539     struct i965_surface_state *ss;
540     dri_bo *ss_bo;
541
542     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
543                       "surface state", 
544                       sizeof(struct i965_surface_state), 32);
545     assert(ss_bo);
546     dri_bo_map(ss_bo, 1);
547     assert(ss_bo->virtual);
548     ss = ss_bo->virtual;
549     memset(ss, 0, sizeof(*ss));
550     ss->ss0.surface_type = I965_SURFACE_2D;
551     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
552     ss->ss0.writedisable_alpha = 0;
553     ss->ss0.writedisable_red = 0;
554     ss->ss0.writedisable_green = 0;
555     ss->ss0.writedisable_blue = 0;
556     ss->ss0.color_blend = 1;
557     ss->ss0.vert_line_stride = 0;
558     ss->ss0.vert_line_stride_ofs = 0;
559     ss->ss0.mipmap_layout_mode = 0;
560     ss->ss0.render_cache_read_mode = 0;
561
562     ss->ss1.base_addr = region->offset + offset;
563
564     ss->ss2.width = w - 1;
565     ss->ss2.height = h - 1;
566     ss->ss2.mip_count = 0;
567     ss->ss2.render_target_rotation = 0;
568
569     ss->ss3.pitch = w - 1;
570
571     dri_bo_emit_reloc(ss_bo,
572                       I915_GEM_DOMAIN_SAMPLER, 0,
573                       offset,
574                       offsetof(struct i965_surface_state, ss1),
575                       region);
576
577     dri_bo_unmap(ss_bo);
578
579     assert(index < MAX_RENDER_SURFACES);
580     assert(render_state->wm.surface[index] == NULL);
581     render_state->wm.surface[index] = ss_bo;
582     render_state->wm.sampler_count++;
583 }
584
585 static void
586 i965_subpic_render_src_surface_state(VADriverContextP ctx, 
587                               int index,
588                               dri_bo *region,
589                               unsigned long offset,
590                               int w, int h, int format)
591 {
592     struct i965_driver_data *i965 = i965_driver_data(ctx);  
593     struct i965_render_state *render_state = &i965->render_state;
594     struct i965_surface_state *ss;
595     dri_bo *ss_bo;
596
597     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
598                       "surface state", 
599                       sizeof(struct i965_surface_state), 32);
600     assert(ss_bo);
601     dri_bo_map(ss_bo, 1);
602     assert(ss_bo->virtual);
603     ss = ss_bo->virtual;
604     memset(ss, 0, sizeof(*ss));
605     ss->ss0.surface_type = I965_SURFACE_2D;
606     ss->ss0.surface_format = format;
607     ss->ss0.writedisable_alpha = 0;
608     ss->ss0.writedisable_red = 0;
609     ss->ss0.writedisable_green = 0;
610     ss->ss0.writedisable_blue = 0;
611     ss->ss0.color_blend = 1;
612     ss->ss0.vert_line_stride = 0;
613     ss->ss0.vert_line_stride_ofs = 0;
614     ss->ss0.mipmap_layout_mode = 0;
615     ss->ss0.render_cache_read_mode = 0;
616
617     ss->ss1.base_addr = region->offset + offset;
618
619     ss->ss2.width = w - 1;
620     ss->ss2.height = h - 1;
621     ss->ss2.mip_count = 0;
622     ss->ss2.render_target_rotation = 0;
623
624     ss->ss3.pitch = w - 1;
625
626     dri_bo_emit_reloc(ss_bo,
627                       I915_GEM_DOMAIN_SAMPLER, 0,
628                       offset,
629                       offsetof(struct i965_surface_state, ss1),
630                       region);
631
632     dri_bo_unmap(ss_bo);
633
634     assert(index < MAX_RENDER_SURFACES);
635     assert(render_state->wm.surface[index] == NULL);
636     render_state->wm.surface[index] = ss_bo;
637     render_state->wm.sampler_count++;
638 }
639
640 static void
641 i965_render_src_surfaces_state(VADriverContextP ctx,
642                               VASurfaceID surface)
643 {
644     struct i965_driver_data *i965 = i965_driver_data(ctx);  
645     struct object_surface *obj_surface;
646     int w, h;
647     dri_bo *region;
648
649     obj_surface = SURFACE(surface);
650     assert(obj_surface);
651     assert(obj_surface->bo);
652     w = obj_surface->width;
653     h = obj_surface->height;
654     region = obj_surface->bo;
655
656     i965_render_src_surface_state(ctx, 1, region, 0, w, h);     /* Y */
657     i965_render_src_surface_state(ctx, 2, region, 0, w, h);
658     i965_render_src_surface_state(ctx, 3, region, w * h + w * h / 4, w / 2, h / 2);     /* V */
659     i965_render_src_surface_state(ctx, 4, region, w * h + w * h / 4, w / 2, h / 2);
660     i965_render_src_surface_state(ctx, 5, region, w * h, w / 2, h / 2); /* U */
661     i965_render_src_surface_state(ctx, 6, region, w * h, w / 2, h / 2);
662 }
663
664 static void
665 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
666                               VASurfaceID surface)
667 {
668     struct i965_driver_data *i965 = i965_driver_data(ctx);  
669     struct object_surface *obj_surface = SURFACE(surface);
670     int w, h;
671     dri_bo *region;
672     dri_bo *subpic_region;
673     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
674     struct object_image *obj_image = IMAGE(obj_subpic->image);
675     assert(obj_surface);
676     assert(obj_surface->bo);
677     w = obj_surface->width;
678     h = obj_surface->height;
679     region = obj_surface->bo;
680     subpic_region = obj_image->bo;
681     /*subpicture surface*/
682     i965_subpic_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->format);     
683     i965_subpic_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->format);     
684 }
685
686 static void
687 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
688 {
689     switch (tiling) {
690     case I915_TILING_NONE:
691         ss->ss3.tiled_surface = 0;
692         ss->ss3.tile_walk = 0;
693         break;
694     case I915_TILING_X:
695         ss->ss3.tiled_surface = 1;
696         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
697         break;
698     case I915_TILING_Y:
699         ss->ss3.tiled_surface = 1;
700         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
701         break;
702     }
703 }
704
705 static void
706 i965_render_dest_surface_state(VADriverContextP ctx, int index)
707 {
708     struct i965_driver_data *i965 = i965_driver_data(ctx);  
709     struct i965_render_state *render_state = &i965->render_state;
710     struct intel_region *dest_region = render_state->draw_region;
711     struct i965_surface_state *ss;
712     dri_bo *ss_bo;
713
714     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
715                       "surface state", 
716                       sizeof(struct i965_surface_state), 32);
717     assert(ss_bo);
718     dri_bo_map(ss_bo, 1);
719     assert(ss_bo->virtual);
720     ss = ss_bo->virtual;
721     memset(ss, 0, sizeof(*ss));
722
723     ss->ss0.surface_type = I965_SURFACE_2D;
724     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
725
726     if (dest_region->cpp == 2) {
727         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
728         } else {
729         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
730     }
731
732     ss->ss0.writedisable_alpha = 0;
733     ss->ss0.writedisable_red = 0;
734     ss->ss0.writedisable_green = 0;
735     ss->ss0.writedisable_blue = 0;
736     ss->ss0.color_blend = 1;
737     ss->ss0.vert_line_stride = 0;
738     ss->ss0.vert_line_stride_ofs = 0;
739     ss->ss0.mipmap_layout_mode = 0;
740     ss->ss0.render_cache_read_mode = 0;
741
742     ss->ss1.base_addr = dest_region->bo->offset;
743
744     ss->ss2.width = dest_region->width - 1;
745     ss->ss2.height = dest_region->height - 1;
746     ss->ss2.mip_count = 0;
747     ss->ss2.render_target_rotation = 0;
748     ss->ss3.pitch = dest_region->pitch - 1;
749     i965_render_set_surface_tiling(ss, dest_region->tiling);
750
751     dri_bo_emit_reloc(ss_bo,
752                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
753                       0,
754                       offsetof(struct i965_surface_state, ss1),
755                       dest_region->bo);
756
757     dri_bo_unmap(ss_bo);
758
759     assert(index < MAX_RENDER_SURFACES);
760     assert(render_state->wm.surface[index] == NULL);
761     render_state->wm.surface[index] = ss_bo;
762 }
763
764 static void
765 i965_render_binding_table(VADriverContextP ctx)
766 {
767     struct i965_driver_data *i965 = i965_driver_data(ctx);
768     struct i965_render_state *render_state = &i965->render_state;
769     int i;
770     unsigned int *binding_table;
771
772     dri_bo_map(render_state->wm.binding_table, 1);
773     assert(render_state->wm.binding_table->virtual);
774     binding_table = render_state->wm.binding_table->virtual;
775     memset(binding_table, 0, render_state->wm.binding_table->size);
776
777     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
778         if (render_state->wm.surface[i]) {
779             binding_table[i] = render_state->wm.surface[i]->offset;
780             dri_bo_emit_reloc(render_state->wm.binding_table,
781                               I915_GEM_DOMAIN_INSTRUCTION, 0,
782                               0,
783                               i * sizeof(*binding_table),
784                               render_state->wm.surface[i]);
785         }
786     }
787
788     dri_bo_unmap(render_state->wm.binding_table);
789 }
790
791 static void 
792 i965_subpic_render_upload_vertex(VADriverContextP ctx,
793                                  VASurfaceID surface,
794                                  const VARectangle *output_rect)
795 {    
796     struct i965_driver_data  *i965         = i965_driver_data(ctx);
797     struct i965_render_state *render_state = &i965->render_state;
798     struct object_surface    *obj_surface  = SURFACE(surface);
799     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
800
801     const float psx = (float)obj_surface->width  / (float)obj_subpic->width;
802     const float psy = (float)obj_surface->height / (float)obj_subpic->height;
803     const float ssx = (float)output_rect->width  / (float)obj_surface->width;
804     const float ssy = (float)output_rect->height / (float)obj_surface->height;
805     const float sx  = psx * ssx;
806     const float sy  = psy * ssy;
807     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
808     int i = 0;
809
810     VARectangle dst_rect;
811     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
812     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
813     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
814     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
815
816     dri_bo_map(render_state->vb.vertex_buffer, 1);
817     assert(render_state->vb.vertex_buffer->virtual);
818     vb = render_state->vb.vertex_buffer->virtual;
819
820     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
821     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
822     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
823     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
824
825     x1 = (float)dst_rect.x;
826     y1 = (float)dst_rect.y;
827     x2 = (float)(dst_rect.x + dst_rect.width);
828     y2 = (float)(dst_rect.y + dst_rect.height);
829
830     vb[i++] = tx2;
831     vb[i++] = ty2;
832     vb[i++] = x2;
833     vb[i++] = y2;
834
835     vb[i++] = tx1;
836     vb[i++] = ty2;
837     vb[i++] = x1;
838     vb[i++] = y2;
839
840     vb[i++] = tx1;
841     vb[i++] = ty1;
842     vb[i++] = x1;
843     vb[i++] = y1;
844     dri_bo_unmap(render_state->vb.vertex_buffer);
845 }
846
847 static void 
848 i965_render_upload_vertex(VADriverContextP ctx,
849                           VASurfaceID surface,
850                           short srcx,
851                           short srcy,
852                           unsigned short srcw,
853                           unsigned short srch,
854                           short destx,
855                           short desty,
856                           unsigned short destw,
857                           unsigned short desth)
858 {
859     struct i965_driver_data *i965 = i965_driver_data(ctx);
860     struct i965_render_state *render_state = &i965->render_state;
861     struct intel_region *dest_region = render_state->draw_region;
862     struct object_surface *obj_surface;
863     float *vb;
864
865     float u1, v1, u2, v2;
866     int i, width, height;
867     int box_x1 = dest_region->x + destx;
868     int box_y1 = dest_region->y + desty;
869     int box_x2 = box_x1 + destw;
870     int box_y2 = box_y1 + desth;
871
872     obj_surface = SURFACE(surface);
873     assert(surface);
874     width = obj_surface->width;
875     height = obj_surface->height;
876
877     u1 = (float)srcx / width;
878     v1 = (float)srcy / height;
879     u2 = (float)(srcx + srcw) / width;
880     v2 = (float)(srcy + srch) / height;
881
882     dri_bo_map(render_state->vb.vertex_buffer, 1);
883     assert(render_state->vb.vertex_buffer->virtual);
884     vb = render_state->vb.vertex_buffer->virtual;
885
886     i = 0;
887     vb[i++] = u2;
888     vb[i++] = v2;
889     vb[i++] = (float)box_x2;
890     vb[i++] = (float)box_y2;
891     
892     vb[i++] = u1;
893     vb[i++] = v2;
894     vb[i++] = (float)box_x1;
895     vb[i++] = (float)box_y2;
896
897     vb[i++] = u1;
898     vb[i++] = v1;
899     vb[i++] = (float)box_x1;
900     vb[i++] = (float)box_y1;
901
902     dri_bo_unmap(render_state->vb.vertex_buffer);
903 }
904
905 static void
906 i965_surface_render_state_setup(VADriverContextP ctx,
907                         VASurfaceID surface,
908                         short srcx,
909                         short srcy,
910                         unsigned short srcw,
911                         unsigned short srch,
912                         short destx,
913                         short desty,
914                         unsigned short destw,
915                         unsigned short desth)
916 {
917     i965_render_vs_unit(ctx);
918     i965_render_sf_unit(ctx);
919     i965_render_dest_surface_state(ctx, 0);
920     i965_render_src_surfaces_state(ctx, surface);
921     i965_render_sampler(ctx);
922     i965_render_wm_unit(ctx);
923     i965_render_cc_viewport(ctx);
924     i965_render_cc_unit(ctx);
925     i965_render_binding_table(ctx);
926     i965_render_upload_vertex(ctx, surface,
927                               srcx, srcy, srcw, srch,
928                               destx, desty, destw, desth);
929 }
930 static void
931 i965_subpic_render_state_setup(VADriverContextP ctx,
932                         VASurfaceID surface,
933                         short srcx,
934                         short srcy,
935                         unsigned short srcw,
936                         unsigned short srch,
937                         short destx,
938                         short desty,
939                         unsigned short destw,
940                         unsigned short desth)
941 {
942     i965_render_vs_unit(ctx);
943     i965_render_sf_unit(ctx);
944     i965_render_dest_surface_state(ctx, 0);
945     i965_subpic_render_src_surfaces_state(ctx, surface);
946     i965_render_sampler(ctx);
947     i965_subpic_render_wm_unit(ctx);
948     i965_render_cc_viewport(ctx);
949     i965_subpic_render_cc_unit(ctx);
950     i965_render_binding_table(ctx);
951
952     VARectangle output_rect;
953     output_rect.x      = destx;
954     output_rect.y      = desty;
955     output_rect.width  = destw;
956     output_rect.height = desth;
957     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
958 }
959
960
961 static void
962 i965_render_pipeline_select(VADriverContextP ctx)
963 {
964     BEGIN_BATCH(ctx, 1);
965     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
966     ADVANCE_BATCH(ctx);
967 }
968
969 static void
970 i965_render_state_sip(VADriverContextP ctx)
971 {
972     BEGIN_BATCH(ctx, 2);
973     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
974     OUT_BATCH(ctx, 0);
975     ADVANCE_BATCH(ctx);
976 }
977
978 static void
979 i965_render_state_base_address(VADriverContextP ctx)
980 {
981     struct i965_driver_data *i965 = i965_driver_data(ctx);
982
983     if (IS_IGDNG(i965->intel.device_id)) {
984         BEGIN_BATCH(ctx, 8);
985         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
986         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
987         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
988         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
989         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
990         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
991         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
992         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
993         ADVANCE_BATCH(ctx);
994     } else {
995         BEGIN_BATCH(ctx, 6);
996         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
997         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
998         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
999         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1001         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1002         ADVANCE_BATCH(ctx);
1003     }
1004 }
1005
1006 static void
1007 i965_render_binding_table_pointers(VADriverContextP ctx)
1008 {
1009     struct i965_driver_data *i965 = i965_driver_data(ctx);
1010     struct i965_render_state *render_state = &i965->render_state;
1011
1012     BEGIN_BATCH(ctx, 6);
1013     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
1014     OUT_BATCH(ctx, 0); /* vs */
1015     OUT_BATCH(ctx, 0); /* gs */
1016     OUT_BATCH(ctx, 0); /* clip */
1017     OUT_BATCH(ctx, 0); /* sf */
1018     OUT_RELOC(ctx, render_state->wm.binding_table, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* wm */
1019     ADVANCE_BATCH(ctx);
1020 }
1021
1022 static void 
1023 i965_render_constant_color(VADriverContextP ctx)
1024 {
1025     BEGIN_BATCH(ctx, 5);
1026     OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3);
1027     OUT_BATCH(ctx, float_to_uint(1.0));
1028     OUT_BATCH(ctx, float_to_uint(0.0));
1029     OUT_BATCH(ctx, float_to_uint(1.0));
1030     OUT_BATCH(ctx, float_to_uint(1.0));
1031     ADVANCE_BATCH(ctx);
1032 }
1033
1034 static void
1035 i965_render_pipelined_pointers(VADriverContextP ctx)
1036 {
1037     struct i965_driver_data *i965 = i965_driver_data(ctx);
1038     struct i965_render_state *render_state = &i965->render_state;
1039
1040     BEGIN_BATCH(ctx, 7);
1041     OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5);
1042     OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1043     OUT_BATCH(ctx, 0);  /* disable GS */
1044     OUT_BATCH(ctx, 0);  /* disable CLIP */
1045     OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1046     OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1047     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1048     ADVANCE_BATCH(ctx);
1049 }
1050
1051 static void
1052 i965_render_urb_layout(VADriverContextP ctx)
1053 {
1054     int urb_vs_start, urb_vs_size;
1055     int urb_gs_start, urb_gs_size;
1056     int urb_clip_start, urb_clip_size;
1057     int urb_sf_start, urb_sf_size;
1058     int urb_cs_start, urb_cs_size;
1059
1060     urb_vs_start = 0;
1061     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1062     urb_gs_start = urb_vs_start + urb_vs_size;
1063     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1064     urb_clip_start = urb_gs_start + urb_gs_size;
1065     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1066     urb_sf_start = urb_clip_start + urb_clip_size;
1067     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1068     urb_cs_start = urb_sf_start + urb_sf_size;
1069     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1070
1071     BEGIN_BATCH(ctx, 3);
1072     OUT_BATCH(ctx, 
1073               CMD_URB_FENCE |
1074               UF0_CS_REALLOC |
1075               UF0_SF_REALLOC |
1076               UF0_CLIP_REALLOC |
1077               UF0_GS_REALLOC |
1078               UF0_VS_REALLOC |
1079               1);
1080     OUT_BATCH(ctx, 
1081               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1082               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1083               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1084     OUT_BATCH(ctx,
1085               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1086               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1087     ADVANCE_BATCH(ctx);
1088 }
1089
1090 static void 
1091 i965_render_cs_urb_layout(VADriverContextP ctx)
1092 {
1093     BEGIN_BATCH(ctx, 2);
1094     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
1095     OUT_BATCH(ctx,
1096               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1097               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1098     ADVANCE_BATCH(ctx);
1099 }
1100
1101 static void
1102 i965_render_drawing_rectangle(VADriverContextP ctx)
1103 {
1104     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1105     struct i965_render_state *render_state = &i965->render_state;
1106     struct intel_region *dest_region = render_state->draw_region;
1107
1108     BEGIN_BATCH(ctx, 4);
1109     OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2);
1110     OUT_BATCH(ctx, 0x00000000);
1111     OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1112     OUT_BATCH(ctx, 0x00000000);         
1113     ADVANCE_BATCH(ctx);
1114 }
1115
1116 static void
1117 i965_render_vertex_elements(VADriverContextP ctx)
1118 {
1119     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1120
1121     if (IS_IGDNG(i965->intel.device_id)) {
1122         BEGIN_BATCH(ctx, 5);
1123         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1124         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1125         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1126                   VE0_VALID |
1127                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1128                   (0 << VE0_OFFSET_SHIFT));
1129         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1130                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1131                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1132                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1133         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1134         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1135                   VE0_VALID |
1136                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1137                   (8 << VE0_OFFSET_SHIFT));
1138         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1139                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1140                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1141                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1142         ADVANCE_BATCH(ctx);
1143     } else {
1144         BEGIN_BATCH(ctx, 5);
1145         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1146         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1147         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1148                   VE0_VALID |
1149                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1150                   (0 << VE0_OFFSET_SHIFT));
1151         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1152                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1153                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1154                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1155                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1156         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1157         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1158                   VE0_VALID |
1159                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1160                   (8 << VE0_OFFSET_SHIFT));
1161         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1162                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1163                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1164                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1165                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1166         ADVANCE_BATCH(ctx);
1167     }
1168 }
1169
1170 static void
1171 i965_render_upload_image_palette(
1172     VADriverContextP ctx,
1173     VAImageID        image_id,
1174     unsigned int     alpha
1175 )
1176 {
1177     struct i965_driver_data *i965 = i965_driver_data(ctx);
1178     unsigned int i;
1179
1180     struct object_image *obj_image = IMAGE(image_id);
1181     assert(obj_image);
1182
1183     if (obj_image->image.num_palette_entries == 0)
1184         return;
1185
1186     BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries);
1187     OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1188     /*fill palette*/
1189     //int32_t out[16]; //0-23:color 23-31:alpha
1190     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1191         OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]);
1192     ADVANCE_BATCH(ctx);
1193 }
1194
1195 static void
1196 i965_render_startup(VADriverContextP ctx)
1197 {
1198     struct i965_driver_data *i965 = i965_driver_data(ctx);
1199     struct i965_render_state *render_state = &i965->render_state;
1200
1201     BEGIN_BATCH(ctx, 11);
1202     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1203     OUT_BATCH(ctx, 
1204               (0 << VB0_BUFFER_INDEX_SHIFT) |
1205               VB0_VERTEXDATA |
1206               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1207     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1208
1209     if (IS_IGDNG(i965->intel.device_id))
1210         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1211     else
1212         OUT_BATCH(ctx, 3);
1213
1214     OUT_BATCH(ctx, 0);
1215
1216     OUT_BATCH(ctx, 
1217               CMD_3DPRIMITIVE |
1218               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1219               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1220               (0 << 9) |
1221               4);
1222     OUT_BATCH(ctx, 3); /* vertex count per instance */
1223     OUT_BATCH(ctx, 0); /* start vertex offset */
1224     OUT_BATCH(ctx, 1); /* single instance */
1225     OUT_BATCH(ctx, 0); /* start instance location */
1226     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1227     ADVANCE_BATCH(ctx);
1228 }
1229
1230 static void 
1231 i965_clear_dest_region(VADriverContextP ctx)
1232 {
1233     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1234     struct i965_render_state *render_state = &i965->render_state;
1235     struct intel_region *dest_region = render_state->draw_region;
1236     unsigned int blt_cmd, br13;
1237     int pitch;
1238
1239     blt_cmd = XY_COLOR_BLT_CMD;
1240     br13 = 0xf0 << 16;
1241     pitch = dest_region->pitch;
1242
1243     if (dest_region->cpp == 4) {
1244         br13 |= BR13_8888;
1245         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1246     } else {
1247         assert(dest_region->cpp == 2);
1248         br13 |= BR13_565;
1249     }
1250
1251     if (dest_region->tiling != I915_TILING_NONE) {
1252         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1253         pitch /= 4;
1254     }
1255
1256     br13 |= pitch;
1257
1258     BEGIN_BATCH(ctx, 6);
1259     OUT_BATCH(ctx, blt_cmd);
1260     OUT_BATCH(ctx, br13);
1261     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
1262     OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) |
1263               (dest_region->x + dest_region->width));
1264     OUT_RELOC(ctx, dest_region->bo, 
1265               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1266               0);
1267     OUT_BATCH(ctx, 0x0);
1268     ADVANCE_BATCH(ctx);
1269 }
1270
1271 static void
1272 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1273 {
1274     intel_batchbuffer_start_atomic(ctx, 0x1000);
1275     intel_batchbuffer_emit_mi_flush(ctx);
1276     i965_clear_dest_region(ctx);
1277     i965_render_pipeline_select(ctx);
1278     i965_render_state_sip(ctx);
1279     i965_render_state_base_address(ctx);
1280     i965_render_binding_table_pointers(ctx);
1281     i965_render_constant_color(ctx);
1282     i965_render_pipelined_pointers(ctx);
1283     i965_render_urb_layout(ctx);
1284     i965_render_cs_urb_layout(ctx);
1285     i965_render_drawing_rectangle(ctx);
1286     i965_render_vertex_elements(ctx);
1287     i965_render_startup(ctx);
1288     intel_batchbuffer_end_atomic(ctx);
1289 }
1290
1291 static void
1292 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1293 {
1294     intel_batchbuffer_start_atomic(ctx, 0x1000);
1295     intel_batchbuffer_emit_mi_flush(ctx);
1296     i965_render_pipeline_select(ctx);
1297     i965_render_state_sip(ctx);
1298     i965_render_state_base_address(ctx);
1299     i965_render_binding_table_pointers(ctx);
1300     i965_render_constant_color(ctx);
1301     i965_render_pipelined_pointers(ctx);
1302     i965_render_urb_layout(ctx);
1303     i965_render_cs_urb_layout(ctx);
1304     i965_render_drawing_rectangle(ctx);
1305     i965_render_vertex_elements(ctx);
1306     i965_render_startup(ctx);
1307     intel_batchbuffer_end_atomic(ctx);
1308 }
1309
1310
1311 static void 
1312 i965_render_initialize(VADriverContextP ctx)
1313 {
1314     struct i965_driver_data *i965 = i965_driver_data(ctx);
1315     struct i965_render_state *render_state = &i965->render_state;
1316     int i;
1317     dri_bo *bo;
1318
1319     /* VERTEX BUFFER */
1320     dri_bo_unreference(render_state->vb.vertex_buffer);
1321     bo = dri_bo_alloc(i965->intel.bufmgr,
1322                       "vertex buffer",
1323                       4096,
1324                       4096);
1325     assert(bo);
1326     render_state->vb.vertex_buffer = bo;
1327
1328     /* VS */
1329     dri_bo_unreference(render_state->vs.state);
1330     bo = dri_bo_alloc(i965->intel.bufmgr,
1331                       "vs state",
1332                       sizeof(struct i965_vs_unit_state),
1333                       64);
1334     assert(bo);
1335     render_state->vs.state = bo;
1336
1337     /* GS */
1338     /* CLIP */
1339     /* SF */
1340     dri_bo_unreference(render_state->sf.state);
1341     bo = dri_bo_alloc(i965->intel.bufmgr,
1342                       "sf state",
1343                       sizeof(struct i965_sf_unit_state),
1344                       64);
1345     assert(bo);
1346     render_state->sf.state = bo;
1347
1348     /* WM */
1349     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
1350         dri_bo_unreference(render_state->wm.surface[i]);
1351         render_state->wm.surface[i] = NULL;
1352     }
1353
1354     dri_bo_unreference(render_state->wm.binding_table);
1355     bo = dri_bo_alloc(i965->intel.bufmgr,
1356                       "binding table",
1357                       MAX_RENDER_SURFACES * sizeof(unsigned int),
1358                       64);
1359     assert(bo);
1360     render_state->wm.binding_table = bo;
1361
1362     dri_bo_unreference(render_state->wm.sampler);
1363     bo = dri_bo_alloc(i965->intel.bufmgr,
1364                       "sampler state",
1365                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1366                       64);
1367     assert(bo);
1368     render_state->wm.sampler = bo;
1369     render_state->wm.sampler_count = 0;
1370
1371     dri_bo_unreference(render_state->wm.state);
1372     bo = dri_bo_alloc(i965->intel.bufmgr,
1373                       "wm state",
1374                       sizeof(struct i965_wm_unit_state),
1375                       64);
1376     assert(bo);
1377     render_state->wm.state = bo;
1378
1379     /* COLOR CALCULATOR */
1380     dri_bo_unreference(render_state->cc.state);
1381     bo = dri_bo_alloc(i965->intel.bufmgr,
1382                       "color calc state",
1383                       sizeof(struct i965_cc_unit_state),
1384                       64);
1385     assert(bo);
1386     render_state->cc.state = bo;
1387
1388     dri_bo_unreference(render_state->cc.viewport);
1389     bo = dri_bo_alloc(i965->intel.bufmgr,
1390                       "cc viewport",
1391                       sizeof(struct i965_cc_viewport),
1392                       64);
1393     assert(bo);
1394     render_state->cc.viewport = bo;
1395 }
1396
1397 void
1398 i965_render_put_surface(VADriverContextP ctx,
1399                         VASurfaceID surface,
1400                         short srcx,
1401                         short srcy,
1402                         unsigned short srcw,
1403                         unsigned short srch,
1404                         short destx,
1405                         short desty,
1406                         unsigned short destw,
1407                         unsigned short desth)
1408 {
1409     i965_render_initialize(ctx);
1410     i965_surface_render_state_setup(ctx, surface,
1411                             srcx, srcy, srcw, srch,
1412                             destx, desty, destw, desth);
1413     i965_surface_render_pipeline_setup(ctx);
1414     intel_batchbuffer_flush(ctx);
1415 }
1416
1417 void
1418 i965_render_put_subpic(VADriverContextP ctx,
1419                         VASurfaceID surface,
1420                         short srcx,
1421                         short srcy,
1422                         unsigned short srcw,
1423                         unsigned short srch,
1424                         short destx,
1425                         short desty,
1426                         unsigned short destw,
1427                         unsigned short desth)
1428 {
1429     struct i965_driver_data *i965 = i965_driver_data(ctx);
1430     struct object_surface *obj_surface = SURFACE(surface);
1431     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1432     assert(obj_subpic);
1433
1434     i965_render_initialize(ctx);
1435     i965_subpic_render_state_setup(ctx, surface,
1436             srcx, srcy, srcw, srch,
1437             destx, desty, destw, desth);
1438     i965_subpic_render_pipeline_setup(ctx);
1439     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1440     intel_batchbuffer_flush(ctx);
1441 }
1442
1443
1444 Bool 
1445 i965_render_init(VADriverContextP ctx)
1446 {
1447     struct i965_driver_data *i965 = i965_driver_data(ctx);
1448     int i;
1449
1450     /* kernel */
1451     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
1452                                  sizeof(render_kernels_gen5[0])));
1453
1454     if (IS_IGDNG(i965->intel.device_id))
1455         render_kernels = render_kernels_gen5;
1456     else
1457         render_kernels = render_kernels_gen4;
1458
1459     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1460         struct render_kernel *kernel = &render_kernels[i];
1461         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
1462                                   kernel->name, 
1463                                   kernel->size, 64);
1464         assert(kernel->bo);
1465         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
1466     }
1467
1468     return True;
1469 }
1470
1471 Bool 
1472 i965_render_terminate(VADriverContextP ctx)
1473 {
1474     int i;
1475     struct i965_driver_data *i965 = i965_driver_data(ctx);
1476     struct i965_render_state *render_state = &i965->render_state;
1477
1478     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1479         struct render_kernel *kernel = &render_kernels[i];
1480         
1481         dri_bo_unreference(kernel->bo);
1482         kernel->bo = NULL;
1483     }
1484
1485     dri_bo_unreference(render_state->vb.vertex_buffer);
1486     render_state->vb.vertex_buffer = NULL;
1487     dri_bo_unreference(render_state->vs.state);
1488     render_state->vs.state = NULL;
1489     dri_bo_unreference(render_state->sf.state);
1490     render_state->sf.state = NULL;
1491     dri_bo_unreference(render_state->wm.binding_table);
1492     render_state->wm.binding_table = NULL;
1493     dri_bo_unreference(render_state->wm.sampler);
1494     render_state->wm.sampler = NULL;
1495     dri_bo_unreference(render_state->wm.state);
1496     render_state->wm.state = NULL;
1497
1498     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
1499         dri_bo_unreference(render_state->wm.surface[i]);
1500         render_state->wm.surface[i] = NULL;
1501     }
1502
1503     dri_bo_unreference(render_state->cc.viewport);
1504     render_state->cc.viewport = NULL;
1505     dri_bo_unreference(render_state->cc.state);
1506     render_state->cc.state = NULL;
1507
1508     if (render_state->draw_region) {
1509         dri_bo_unreference(render_state->draw_region->bo);
1510         free(render_state->draw_region);
1511         render_state->draw_region = NULL;
1512     }
1513
1514     return True;
1515 }
1516