6221c5229d709fda5b2882f3be44dda49c709b8a
[profile/ivi/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <string.h>
36 #include <assert.h>
37
38 #include <va/va_backend.h>
39 #include "va/x11/va_dricommon.h"
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43
44 #include "i965_defines.h"
45 #include "i965_render.h"
46 #include "i965_drv_video.h"
47
48 #define SF_KERNEL_NUM_GRF       16
49 #define SF_MAX_THREADS          1
50
51 static const unsigned int sf_kernel_static[][4] = 
52 {
53 #include "shaders/render/exa_sf.g4b"
54 };
55
56 #define PS_KERNEL_NUM_GRF       32
57 #define PS_MAX_THREADS          32
58
59 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
60
61 static const unsigned int ps_kernel_static[][4] = 
62 {
63 #include "shaders/render/exa_wm_xy.g4b"
64 #include "shaders/render/exa_wm_src_affine.g4b"
65 #include "shaders/render/exa_wm_src_sample_planar.g4b"
66 #include "shaders/render/exa_wm_yuv_rgb.g4b"
67 #include "shaders/render/exa_wm_write.g4b"
68 };
69 static const unsigned int ps_subpic_kernel_static[][4] = 
70 {
71 #include "shaders/render/exa_wm_xy.g4b"
72 #include "shaders/render/exa_wm_src_affine.g4b"
73 #include "shaders/render/exa_wm_src_sample_argb.g4b"
74 #include "shaders/render/exa_wm_write.g4b"
75 };
76
77 /* On IGDNG */
78 static const unsigned int sf_kernel_static_gen5[][4] = 
79 {
80 #include "shaders/render/exa_sf.g4b.gen5"
81 };
82
83 static const unsigned int ps_kernel_static_gen5[][4] = 
84 {
85 #include "shaders/render/exa_wm_xy.g4b.gen5"
86 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
87 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
88 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
89 #include "shaders/render/exa_wm_write.g4b.gen5"
90 };
91 static const unsigned int ps_subpic_kernel_static_gen5[][4] = 
92 {
93 #include "shaders/render/exa_wm_xy.g4b.gen5"
94 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
95 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
96 #include "shaders/render/exa_wm_write.g4b.gen5"
97 };
98
99 static uint32_t float_to_uint (float f) 
100 {
101     union {
102         uint32_t i; 
103         float f;
104     } x;
105
106     x.f = f;
107     return x.i;
108 }
109
110 enum 
111 {
112     SF_KERNEL = 0,
113     PS_KERNEL,
114     PS_SUBPIC_KERNEL
115 };
116
117 struct render_kernel
118 {
119     char *name;
120     const unsigned int (*bin)[4];
121     int size;
122     dri_bo *bo;
123 };
124
125 static struct render_kernel render_kernels_gen4[] = {
126     {
127         "SF",
128         sf_kernel_static,
129         sizeof(sf_kernel_static),
130         NULL
131     },
132     {
133         "PS",
134         ps_kernel_static,
135         sizeof(ps_kernel_static),
136         NULL
137     },
138
139     {
140         "PS_SUBPIC",
141         ps_subpic_kernel_static,
142         sizeof(ps_subpic_kernel_static),
143         NULL
144     }
145 };
146
147 static struct render_kernel render_kernels_gen5[] = {
148     {
149         "SF",
150         sf_kernel_static_gen5,
151         sizeof(sf_kernel_static_gen5),
152         NULL
153     },
154     {
155         "PS",
156         ps_kernel_static_gen5,
157         sizeof(ps_kernel_static_gen5),
158         NULL
159     },
160
161     {
162         "PS_SUBPIC",
163         ps_subpic_kernel_static_gen5,
164         sizeof(ps_subpic_kernel_static_gen5),
165         NULL
166     }
167 };
168
169 static struct render_kernel *render_kernels = NULL;
170
171 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
172
173 #define URB_VS_ENTRIES        8
174 #define URB_VS_ENTRY_SIZE     1
175
176 #define URB_GS_ENTRIES        0
177 #define URB_GS_ENTRY_SIZE     0
178
179 #define URB_CLIP_ENTRIES      0
180 #define URB_CLIP_ENTRY_SIZE   0
181
182 #define URB_SF_ENTRIES        1
183 #define URB_SF_ENTRY_SIZE     2
184
185 #define URB_CS_ENTRIES        0
186 #define URB_CS_ENTRY_SIZE     0
187
188 static void
189 i965_render_vs_unit(VADriverContextP ctx)
190 {
191     struct i965_driver_data *i965 = i965_driver_data(ctx);
192     struct i965_render_state *render_state = &i965->render_state;
193     struct i965_vs_unit_state *vs_state;
194
195     dri_bo_map(render_state->vs.state, 1);
196     assert(render_state->vs.state->virtual);
197     vs_state = render_state->vs.state->virtual;
198     memset(vs_state, 0, sizeof(*vs_state));
199
200     if (IS_IGDNG(i965->intel.device_id))
201         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
202     else
203         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
204
205     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
206     vs_state->vs6.vs_enable = 0;
207     vs_state->vs6.vert_cache_disable = 1;
208     
209     dri_bo_unmap(render_state->vs.state);
210 }
211
212 static void
213 i965_render_sf_unit(VADriverContextP ctx)
214 {
215     struct i965_driver_data *i965 = i965_driver_data(ctx);
216     struct i965_render_state *render_state = &i965->render_state;
217     struct i965_sf_unit_state *sf_state;
218
219     dri_bo_map(render_state->sf.state, 1);
220     assert(render_state->sf.state->virtual);
221     sf_state = render_state->sf.state->virtual;
222     memset(sf_state, 0, sizeof(*sf_state));
223
224     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
225     sf_state->thread0.kernel_start_pointer = render_kernels[SF_KERNEL].bo->offset >> 6;
226
227     sf_state->sf1.single_program_flow = 1; /* XXX */
228     sf_state->sf1.binding_table_entry_count = 0;
229     sf_state->sf1.thread_priority = 0;
230     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
231     sf_state->sf1.illegal_op_exception_enable = 1;
232     sf_state->sf1.mask_stack_exception_enable = 1;
233     sf_state->sf1.sw_exception_enable = 1;
234
235     /* scratch space is not used in our kernel */
236     sf_state->thread2.per_thread_scratch_space = 0;
237     sf_state->thread2.scratch_space_base_pointer = 0;
238
239     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
240     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
241     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
242     sf_state->thread3.urb_entry_read_offset = 0;
243     sf_state->thread3.dispatch_grf_start_reg = 3;
244
245     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
246     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
247     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
248     sf_state->thread4.stats_enable = 1;
249
250     sf_state->sf5.viewport_transform = 0; /* skip viewport */
251
252     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
253     sf_state->sf6.scissor = 0;
254
255     sf_state->sf7.trifan_pv = 2;
256
257     sf_state->sf6.dest_org_vbias = 0x8;
258     sf_state->sf6.dest_org_hbias = 0x8;
259
260     dri_bo_emit_reloc(render_state->sf.state,
261                       I915_GEM_DOMAIN_INSTRUCTION, 0,
262                       sf_state->thread0.grf_reg_count << 1,
263                       offsetof(struct i965_sf_unit_state, thread0),
264                       render_kernels[SF_KERNEL].bo);
265
266     dri_bo_unmap(render_state->sf.state);
267 }
268
269 static void 
270 i965_render_sampler(VADriverContextP ctx)
271 {
272     struct i965_driver_data *i965 = i965_driver_data(ctx);
273     struct i965_render_state *render_state = &i965->render_state;
274     struct i965_sampler_state *sampler_state;
275     int i;
276     
277     assert(render_state->wm.sampler_count > 0);
278     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
279
280     dri_bo_map(render_state->wm.sampler, 1);
281     assert(render_state->wm.sampler->virtual);
282     sampler_state = render_state->wm.sampler->virtual;
283     for (i = 0; i < render_state->wm.sampler_count; i++) {
284         memset(sampler_state, 0, sizeof(*sampler_state));
285         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
286         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
287         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
288         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
289         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
290         sampler_state++;
291     }
292
293     dri_bo_unmap(render_state->wm.sampler);
294 }
295 static void
296 i965_subpic_render_wm_unit(VADriverContextP ctx)
297 {
298     struct i965_driver_data *i965 = i965_driver_data(ctx);
299     struct i965_render_state *render_state = &i965->render_state;
300     struct i965_wm_unit_state *wm_state;
301
302     assert(render_state->wm.sampler);
303
304     dri_bo_map(render_state->wm.state, 1);
305     assert(render_state->wm.state->virtual);
306     wm_state = render_state->wm.state->virtual;
307     memset(wm_state, 0, sizeof(*wm_state));
308
309     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
310     wm_state->thread0.kernel_start_pointer = render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
311
312     wm_state->thread1.single_program_flow = 1; /* XXX */
313
314     if (IS_IGDNG(i965->intel.device_id))
315         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
316     else
317         wm_state->thread1.binding_table_entry_count = 7;
318
319     wm_state->thread2.scratch_space_base_pointer = 0;
320     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
321
322     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
323     wm_state->thread3.const_urb_entry_read_length = 0;
324     wm_state->thread3.const_urb_entry_read_offset = 0;
325     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
326     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
327
328     wm_state->wm4.stats_enable = 0;
329     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
330
331     if (IS_IGDNG(i965->intel.device_id))
332         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
333     else
334         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
335
336     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
337     wm_state->wm5.thread_dispatch_enable = 1;
338     wm_state->wm5.enable_16_pix = 1;
339     wm_state->wm5.enable_8_pix = 0;
340     wm_state->wm5.early_depth_test = 1;
341
342     dri_bo_emit_reloc(render_state->wm.state,
343                       I915_GEM_DOMAIN_INSTRUCTION, 0,
344                       wm_state->thread0.grf_reg_count << 1,
345                       offsetof(struct i965_wm_unit_state, thread0),
346                       render_kernels[PS_SUBPIC_KERNEL].bo);
347
348     dri_bo_emit_reloc(render_state->wm.state,
349                       I915_GEM_DOMAIN_INSTRUCTION, 0,
350                       wm_state->wm4.sampler_count << 2,
351                       offsetof(struct i965_wm_unit_state, wm4),
352                       render_state->wm.sampler);
353
354     dri_bo_unmap(render_state->wm.state);
355 }
356
357
358 static void
359 i965_render_wm_unit(VADriverContextP ctx)
360 {
361     struct i965_driver_data *i965 = i965_driver_data(ctx);
362     struct i965_render_state *render_state = &i965->render_state;
363     struct i965_wm_unit_state *wm_state;
364
365     assert(render_state->wm.sampler);
366
367     dri_bo_map(render_state->wm.state, 1);
368     assert(render_state->wm.state->virtual);
369     wm_state = render_state->wm.state->virtual;
370     memset(wm_state, 0, sizeof(*wm_state));
371
372     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
373     wm_state->thread0.kernel_start_pointer = render_kernels[PS_KERNEL].bo->offset >> 6;
374
375     wm_state->thread1.single_program_flow = 1; /* XXX */
376
377     if (IS_IGDNG(i965->intel.device_id))
378         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
379     else
380         wm_state->thread1.binding_table_entry_count = 7;
381
382     wm_state->thread2.scratch_space_base_pointer = 0;
383     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
384
385     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
386     wm_state->thread3.const_urb_entry_read_length = 0;
387     wm_state->thread3.const_urb_entry_read_offset = 0;
388     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
389     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
390
391     wm_state->wm4.stats_enable = 0;
392     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
393
394     if (IS_IGDNG(i965->intel.device_id))
395         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
396     else 
397         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
398
399     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
400     wm_state->wm5.thread_dispatch_enable = 1;
401     wm_state->wm5.enable_16_pix = 1;
402     wm_state->wm5.enable_8_pix = 0;
403     wm_state->wm5.early_depth_test = 1;
404
405     dri_bo_emit_reloc(render_state->wm.state,
406                       I915_GEM_DOMAIN_INSTRUCTION, 0,
407                       wm_state->thread0.grf_reg_count << 1,
408                       offsetof(struct i965_wm_unit_state, thread0),
409                       render_kernels[PS_KERNEL].bo);
410
411     dri_bo_emit_reloc(render_state->wm.state,
412                       I915_GEM_DOMAIN_INSTRUCTION, 0,
413                       wm_state->wm4.sampler_count << 2,
414                       offsetof(struct i965_wm_unit_state, wm4),
415                       render_state->wm.sampler);
416
417     dri_bo_unmap(render_state->wm.state);
418 }
419
420 static void 
421 i965_render_cc_viewport(VADriverContextP ctx)
422 {
423     struct i965_driver_data *i965 = i965_driver_data(ctx);
424     struct i965_render_state *render_state = &i965->render_state;
425     struct i965_cc_viewport *cc_viewport;
426
427     dri_bo_map(render_state->cc.viewport, 1);
428     assert(render_state->cc.viewport->virtual);
429     cc_viewport = render_state->cc.viewport->virtual;
430     memset(cc_viewport, 0, sizeof(*cc_viewport));
431     
432     cc_viewport->min_depth = -1.e35;
433     cc_viewport->max_depth = 1.e35;
434
435     dri_bo_unmap(render_state->cc.viewport);
436 }
437
438 static void 
439 i965_subpic_render_cc_unit(VADriverContextP ctx)
440 {
441     struct i965_driver_data *i965 = i965_driver_data(ctx);
442     struct i965_render_state *render_state = &i965->render_state;
443     struct i965_cc_unit_state *cc_state;
444
445     assert(render_state->cc.viewport);
446
447     dri_bo_map(render_state->cc.state, 1);
448     assert(render_state->cc.state->virtual);
449     cc_state = render_state->cc.state->virtual;
450     memset(cc_state, 0, sizeof(*cc_state));
451
452     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
453     cc_state->cc2.depth_test = 0;       /* disable depth test */
454     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
455     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
456     cc_state->cc3.blend_enable = 1;     /* enable color blend */
457     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
458     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
459     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
460     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
461
462     cc_state->cc5.dither_enable = 0;    /* disable dither */
463     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
464     cc_state->cc5.statistics_enable = 1;
465     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
466     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
467     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
468
469     cc_state->cc6.clamp_post_alpha_blend = 0; 
470     cc_state->cc6.clamp_pre_alpha_blend  =0; 
471     
472     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
473     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
474     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
475     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
476    
477     /*alpha test reference*/
478     cc_state->cc7.alpha_ref.f =0.0 ;
479
480
481     dri_bo_emit_reloc(render_state->cc.state,
482                       I915_GEM_DOMAIN_INSTRUCTION, 0,
483                       0,
484                       offsetof(struct i965_cc_unit_state, cc4),
485                       render_state->cc.viewport);
486
487     dri_bo_unmap(render_state->cc.state);
488 }
489
490
491 static void 
492 i965_render_cc_unit(VADriverContextP ctx)
493 {
494     struct i965_driver_data *i965 = i965_driver_data(ctx);
495     struct i965_render_state *render_state = &i965->render_state;
496     struct i965_cc_unit_state *cc_state;
497
498     assert(render_state->cc.viewport);
499
500     dri_bo_map(render_state->cc.state, 1);
501     assert(render_state->cc.state->virtual);
502     cc_state = render_state->cc.state->virtual;
503     memset(cc_state, 0, sizeof(*cc_state));
504
505     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
506     cc_state->cc2.depth_test = 0;       /* disable depth test */
507     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
508     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
509     cc_state->cc3.blend_enable = 0;     /* disable color blend */
510     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
511     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
512
513     cc_state->cc5.dither_enable = 0;    /* disable dither */
514     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
515     cc_state->cc5.statistics_enable = 1;
516     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
517     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
518     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
519
520     dri_bo_emit_reloc(render_state->cc.state,
521                       I915_GEM_DOMAIN_INSTRUCTION, 0,
522                       0,
523                       offsetof(struct i965_cc_unit_state, cc4),
524                       render_state->cc.viewport);
525
526     dri_bo_unmap(render_state->cc.state);
527 }
528
529 static void
530 i965_render_src_surface_state(VADriverContextP ctx, 
531                               int index,
532                               dri_bo *region,
533                               unsigned long offset,
534                               int w, int h)
535 {
536     struct i965_driver_data *i965 = i965_driver_data(ctx);  
537     struct i965_render_state *render_state = &i965->render_state;
538     struct i965_surface_state *ss;
539     dri_bo *ss_bo;
540
541     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
542                       "surface state", 
543                       sizeof(struct i965_surface_state), 32);
544     assert(ss_bo);
545     dri_bo_map(ss_bo, 1);
546     assert(ss_bo->virtual);
547     ss = ss_bo->virtual;
548     memset(ss, 0, sizeof(*ss));
549     ss->ss0.surface_type = I965_SURFACE_2D;
550     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
551     ss->ss0.writedisable_alpha = 0;
552     ss->ss0.writedisable_red = 0;
553     ss->ss0.writedisable_green = 0;
554     ss->ss0.writedisable_blue = 0;
555     ss->ss0.color_blend = 1;
556     ss->ss0.vert_line_stride = 0;
557     ss->ss0.vert_line_stride_ofs = 0;
558     ss->ss0.mipmap_layout_mode = 0;
559     ss->ss0.render_cache_read_mode = 0;
560
561     ss->ss1.base_addr = region->offset + offset;
562
563     ss->ss2.width = w - 1;
564     ss->ss2.height = h - 1;
565     ss->ss2.mip_count = 0;
566     ss->ss2.render_target_rotation = 0;
567
568     ss->ss3.pitch = w - 1;
569
570     dri_bo_emit_reloc(ss_bo,
571                       I915_GEM_DOMAIN_SAMPLER, 0,
572                       offset,
573                       offsetof(struct i965_surface_state, ss1),
574                       region);
575
576     dri_bo_unmap(ss_bo);
577
578     assert(index < MAX_RENDER_SURFACES);
579     assert(render_state->wm.surface[index] == NULL);
580     render_state->wm.surface[index] = ss_bo;
581     render_state->wm.sampler_count++;
582 }
583
584 static void
585 i965_subpic_render_src_surface_state(VADriverContextP ctx, 
586                               int index,
587                               dri_bo *region,
588                               unsigned long offset,
589                               int w, int h, int format)
590 {
591     struct i965_driver_data *i965 = i965_driver_data(ctx);  
592     struct i965_render_state *render_state = &i965->render_state;
593     struct i965_surface_state *ss;
594     dri_bo *ss_bo;
595
596     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
597                       "surface state", 
598                       sizeof(struct i965_surface_state), 32);
599     assert(ss_bo);
600     dri_bo_map(ss_bo, 1);
601     assert(ss_bo->virtual);
602     ss = ss_bo->virtual;
603     memset(ss, 0, sizeof(*ss));
604     ss->ss0.surface_type = I965_SURFACE_2D;
605     ss->ss0.surface_format = format;
606     ss->ss0.writedisable_alpha = 0;
607     ss->ss0.writedisable_red = 0;
608     ss->ss0.writedisable_green = 0;
609     ss->ss0.writedisable_blue = 0;
610     ss->ss0.color_blend = 1;
611     ss->ss0.vert_line_stride = 0;
612     ss->ss0.vert_line_stride_ofs = 0;
613     ss->ss0.mipmap_layout_mode = 0;
614     ss->ss0.render_cache_read_mode = 0;
615
616     ss->ss1.base_addr = region->offset + offset;
617
618     ss->ss2.width = w - 1;
619     ss->ss2.height = h - 1;
620     ss->ss2.mip_count = 0;
621     ss->ss2.render_target_rotation = 0;
622
623     ss->ss3.pitch = w - 1;
624
625     dri_bo_emit_reloc(ss_bo,
626                       I915_GEM_DOMAIN_SAMPLER, 0,
627                       offset,
628                       offsetof(struct i965_surface_state, ss1),
629                       region);
630
631     dri_bo_unmap(ss_bo);
632
633     assert(index < MAX_RENDER_SURFACES);
634     assert(render_state->wm.surface[index] == NULL);
635     render_state->wm.surface[index] = ss_bo;
636     render_state->wm.sampler_count++;
637 }
638
639 static void
640 i965_render_src_surfaces_state(VADriverContextP ctx,
641                               VASurfaceID surface)
642 {
643     struct i965_driver_data *i965 = i965_driver_data(ctx);  
644     struct object_surface *obj_surface;
645     int w, h;
646     dri_bo *region;
647
648     obj_surface = SURFACE(surface);
649     assert(obj_surface);
650     assert(obj_surface->bo);
651     w = obj_surface->width;
652     h = obj_surface->height;
653     region = obj_surface->bo;
654
655     i965_render_src_surface_state(ctx, 1, region, 0, w, h);     /* Y */
656     i965_render_src_surface_state(ctx, 2, region, 0, w, h);
657     i965_render_src_surface_state(ctx, 3, region, w * h + w * h / 4, w / 2, h / 2);     /* V */
658     i965_render_src_surface_state(ctx, 4, region, w * h + w * h / 4, w / 2, h / 2);
659     i965_render_src_surface_state(ctx, 5, region, w * h, w / 2, h / 2); /* U */
660     i965_render_src_surface_state(ctx, 6, region, w * h, w / 2, h / 2);
661 }
662
663 static void
664 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
665                               VASurfaceID surface)
666 {
667     struct i965_driver_data *i965 = i965_driver_data(ctx);  
668     struct object_surface *obj_surface = SURFACE(surface);
669     int w, h;
670     dri_bo *region;
671     dri_bo *subpic_region;
672     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
673     struct object_image *obj_image = IMAGE(obj_subpic->image);
674     assert(obj_surface);
675     assert(obj_surface->bo);
676     w = obj_surface->width;
677     h = obj_surface->height;
678     region = obj_surface->bo;
679     subpic_region = obj_image->bo;
680     /*subpicture surface*/
681     i965_subpic_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->format);     
682     i965_subpic_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->format);     
683 }
684
685 static void
686 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
687 {
688     switch (tiling) {
689     case I915_TILING_NONE:
690         ss->ss3.tiled_surface = 0;
691         ss->ss3.tile_walk = 0;
692         break;
693     case I915_TILING_X:
694         ss->ss3.tiled_surface = 1;
695         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
696         break;
697     case I915_TILING_Y:
698         ss->ss3.tiled_surface = 1;
699         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
700         break;
701     }
702 }
703
704 static void
705 i965_render_dest_surface_state(VADriverContextP ctx, int index)
706 {
707     struct i965_driver_data *i965 = i965_driver_data(ctx);  
708     struct i965_render_state *render_state = &i965->render_state;
709     struct intel_region *dest_region = render_state->draw_region;
710     struct i965_surface_state *ss;
711     dri_bo *ss_bo;
712
713     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
714                       "surface state", 
715                       sizeof(struct i965_surface_state), 32);
716     assert(ss_bo);
717     dri_bo_map(ss_bo, 1);
718     assert(ss_bo->virtual);
719     ss = ss_bo->virtual;
720     memset(ss, 0, sizeof(*ss));
721
722     ss->ss0.surface_type = I965_SURFACE_2D;
723     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
724
725     if (dest_region->cpp == 2) {
726         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
727         } else {
728         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
729     }
730
731     ss->ss0.writedisable_alpha = 0;
732     ss->ss0.writedisable_red = 0;
733     ss->ss0.writedisable_green = 0;
734     ss->ss0.writedisable_blue = 0;
735     ss->ss0.color_blend = 1;
736     ss->ss0.vert_line_stride = 0;
737     ss->ss0.vert_line_stride_ofs = 0;
738     ss->ss0.mipmap_layout_mode = 0;
739     ss->ss0.render_cache_read_mode = 0;
740
741     ss->ss1.base_addr = dest_region->bo->offset;
742
743     ss->ss2.width = dest_region->width - 1;
744     ss->ss2.height = dest_region->height - 1;
745     ss->ss2.mip_count = 0;
746     ss->ss2.render_target_rotation = 0;
747     ss->ss3.pitch = dest_region->pitch - 1;
748     i965_render_set_surface_tiling(ss, dest_region->tiling);
749
750     dri_bo_emit_reloc(ss_bo,
751                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
752                       0,
753                       offsetof(struct i965_surface_state, ss1),
754                       dest_region->bo);
755
756     dri_bo_unmap(ss_bo);
757
758     assert(index < MAX_RENDER_SURFACES);
759     assert(render_state->wm.surface[index] == NULL);
760     render_state->wm.surface[index] = ss_bo;
761 }
762
763 static void
764 i965_render_binding_table(VADriverContextP ctx)
765 {
766     struct i965_driver_data *i965 = i965_driver_data(ctx);
767     struct i965_render_state *render_state = &i965->render_state;
768     int i;
769     unsigned int *binding_table;
770
771     dri_bo_map(render_state->wm.binding_table, 1);
772     assert(render_state->wm.binding_table->virtual);
773     binding_table = render_state->wm.binding_table->virtual;
774     memset(binding_table, 0, render_state->wm.binding_table->size);
775
776     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
777         if (render_state->wm.surface[i]) {
778             binding_table[i] = render_state->wm.surface[i]->offset;
779             dri_bo_emit_reloc(render_state->wm.binding_table,
780                               I915_GEM_DOMAIN_INSTRUCTION, 0,
781                               0,
782                               i * sizeof(*binding_table),
783                               render_state->wm.surface[i]);
784         }
785     }
786
787     dri_bo_unmap(render_state->wm.binding_table);
788 }
789
790 static void 
791 i965_subpic_render_upload_vertex(VADriverContextP ctx,
792                                  VASurfaceID surface,
793                                  const VARectangle *output_rect)
794 {    
795     struct i965_driver_data  *i965         = i965_driver_data(ctx);
796     struct i965_render_state *render_state = &i965->render_state;
797     struct object_surface    *obj_surface  = SURFACE(surface);
798     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
799
800     const float psx = (float)obj_surface->width  / (float)obj_subpic->width;
801     const float psy = (float)obj_surface->height / (float)obj_subpic->height;
802     const float ssx = (float)output_rect->width  / (float)obj_surface->width;
803     const float ssy = (float)output_rect->height / (float)obj_surface->height;
804     const float sx  = psx * ssx;
805     const float sy  = psy * ssy;
806     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
807     int i = 0;
808
809     VARectangle dst_rect;
810     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
811     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
812     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
813     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
814
815     dri_bo_map(render_state->vb.vertex_buffer, 1);
816     assert(render_state->vb.vertex_buffer->virtual);
817     vb = render_state->vb.vertex_buffer->virtual;
818
819     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
820     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
821     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
822     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
823
824     x1 = (float)dst_rect.x;
825     y1 = (float)dst_rect.y;
826     x2 = (float)(dst_rect.x + dst_rect.width);
827     y2 = (float)(dst_rect.y + dst_rect.height);
828
829     vb[i++] = tx2;
830     vb[i++] = ty2;
831     vb[i++] = x2;
832     vb[i++] = y2;
833
834     vb[i++] = tx1;
835     vb[i++] = ty2;
836     vb[i++] = x1;
837     vb[i++] = y2;
838
839     vb[i++] = tx1;
840     vb[i++] = ty1;
841     vb[i++] = x1;
842     vb[i++] = y1;
843     dri_bo_unmap(render_state->vb.vertex_buffer);
844 }
845
846 static void 
847 i965_render_upload_vertex(VADriverContextP ctx,
848                           VASurfaceID surface,
849                           short srcx,
850                           short srcy,
851                           unsigned short srcw,
852                           unsigned short srch,
853                           short destx,
854                           short desty,
855                           unsigned short destw,
856                           unsigned short desth)
857 {
858     struct i965_driver_data *i965 = i965_driver_data(ctx);
859     struct i965_render_state *render_state = &i965->render_state;
860     struct intel_region *dest_region = render_state->draw_region;
861     struct object_surface *obj_surface;
862     float *vb;
863
864     float u1, v1, u2, v2;
865     int i, width, height;
866     int box_x1 = dest_region->x + destx;
867     int box_y1 = dest_region->y + desty;
868     int box_x2 = box_x1 + destw;
869     int box_y2 = box_y1 + desth;
870
871     obj_surface = SURFACE(surface);
872     assert(surface);
873     width = obj_surface->width;
874     height = obj_surface->height;
875
876     u1 = (float)srcx / width;
877     v1 = (float)srcy / height;
878     u2 = (float)(srcx + srcw) / width;
879     v2 = (float)(srcy + srch) / height;
880
881     dri_bo_map(render_state->vb.vertex_buffer, 1);
882     assert(render_state->vb.vertex_buffer->virtual);
883     vb = render_state->vb.vertex_buffer->virtual;
884
885     i = 0;
886     vb[i++] = u2;
887     vb[i++] = v2;
888     vb[i++] = (float)box_x2;
889     vb[i++] = (float)box_y2;
890     
891     vb[i++] = u1;
892     vb[i++] = v2;
893     vb[i++] = (float)box_x1;
894     vb[i++] = (float)box_y2;
895
896     vb[i++] = u1;
897     vb[i++] = v1;
898     vb[i++] = (float)box_x1;
899     vb[i++] = (float)box_y1;
900
901     dri_bo_unmap(render_state->vb.vertex_buffer);
902 }
903
904 static void
905 i965_surface_render_state_setup(VADriverContextP ctx,
906                         VASurfaceID surface,
907                         short srcx,
908                         short srcy,
909                         unsigned short srcw,
910                         unsigned short srch,
911                         short destx,
912                         short desty,
913                         unsigned short destw,
914                         unsigned short desth)
915 {
916     i965_render_vs_unit(ctx);
917     i965_render_sf_unit(ctx);
918     i965_render_dest_surface_state(ctx, 0);
919     i965_render_src_surfaces_state(ctx, surface);
920     i965_render_sampler(ctx);
921     i965_render_wm_unit(ctx);
922     i965_render_cc_viewport(ctx);
923     i965_render_cc_unit(ctx);
924     i965_render_binding_table(ctx);
925     i965_render_upload_vertex(ctx, surface,
926                               srcx, srcy, srcw, srch,
927                               destx, desty, destw, desth);
928 }
929 static void
930 i965_subpic_render_state_setup(VADriverContextP ctx,
931                         VASurfaceID surface,
932                         short srcx,
933                         short srcy,
934                         unsigned short srcw,
935                         unsigned short srch,
936                         short destx,
937                         short desty,
938                         unsigned short destw,
939                         unsigned short desth)
940 {
941     i965_render_vs_unit(ctx);
942     i965_render_sf_unit(ctx);
943     i965_render_dest_surface_state(ctx, 0);
944     i965_subpic_render_src_surfaces_state(ctx, surface);
945     i965_render_sampler(ctx);
946     i965_subpic_render_wm_unit(ctx);
947     i965_render_cc_viewport(ctx);
948     i965_subpic_render_cc_unit(ctx);
949     i965_render_binding_table(ctx);
950
951     VARectangle output_rect;
952     output_rect.x      = destx;
953     output_rect.y      = desty;
954     output_rect.width  = destw;
955     output_rect.height = desth;
956     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
957 }
958
959
960 static void
961 i965_render_pipeline_select(VADriverContextP ctx)
962 {
963     BEGIN_BATCH(ctx, 1);
964     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
965     ADVANCE_BATCH(ctx);
966 }
967
968 static void
969 i965_render_state_sip(VADriverContextP ctx)
970 {
971     BEGIN_BATCH(ctx, 2);
972     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
973     OUT_BATCH(ctx, 0);
974     ADVANCE_BATCH(ctx);
975 }
976
977 static void
978 i965_render_state_base_address(VADriverContextP ctx)
979 {
980     struct i965_driver_data *i965 = i965_driver_data(ctx);
981
982     if (IS_IGDNG(i965->intel.device_id)) {
983         BEGIN_BATCH(ctx, 8);
984         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
985         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
986         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
987         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
988         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
989         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
990         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
991         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
992         ADVANCE_BATCH(ctx);
993     } else {
994         BEGIN_BATCH(ctx, 6);
995         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
996         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
997         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
998         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
999         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1001         ADVANCE_BATCH(ctx);
1002     }
1003 }
1004
1005 static void
1006 i965_render_binding_table_pointers(VADriverContextP ctx)
1007 {
1008     struct i965_driver_data *i965 = i965_driver_data(ctx);
1009     struct i965_render_state *render_state = &i965->render_state;
1010
1011     BEGIN_BATCH(ctx, 6);
1012     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
1013     OUT_BATCH(ctx, 0); /* vs */
1014     OUT_BATCH(ctx, 0); /* gs */
1015     OUT_BATCH(ctx, 0); /* clip */
1016     OUT_BATCH(ctx, 0); /* sf */
1017     OUT_RELOC(ctx, render_state->wm.binding_table, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* wm */
1018     ADVANCE_BATCH(ctx);
1019 }
1020
1021 static void 
1022 i965_render_constant_color(VADriverContextP ctx)
1023 {
1024     BEGIN_BATCH(ctx, 5);
1025     OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3);
1026     OUT_BATCH(ctx, float_to_uint(1.0));
1027     OUT_BATCH(ctx, float_to_uint(0.0));
1028     OUT_BATCH(ctx, float_to_uint(1.0));
1029     OUT_BATCH(ctx, float_to_uint(1.0));
1030     ADVANCE_BATCH(ctx);
1031 }
1032
1033 static void
1034 i965_render_pipelined_pointers(VADriverContextP ctx)
1035 {
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct i965_render_state *render_state = &i965->render_state;
1038
1039     BEGIN_BATCH(ctx, 7);
1040     OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5);
1041     OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1042     OUT_BATCH(ctx, 0);  /* disable GS */
1043     OUT_BATCH(ctx, 0);  /* disable CLIP */
1044     OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1045     OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1046     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1047     ADVANCE_BATCH(ctx);
1048 }
1049
1050 static void
1051 i965_render_urb_layout(VADriverContextP ctx)
1052 {
1053     int urb_vs_start, urb_vs_size;
1054     int urb_gs_start, urb_gs_size;
1055     int urb_clip_start, urb_clip_size;
1056     int urb_sf_start, urb_sf_size;
1057     int urb_cs_start, urb_cs_size;
1058
1059     urb_vs_start = 0;
1060     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1061     urb_gs_start = urb_vs_start + urb_vs_size;
1062     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1063     urb_clip_start = urb_gs_start + urb_gs_size;
1064     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1065     urb_sf_start = urb_clip_start + urb_clip_size;
1066     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1067     urb_cs_start = urb_sf_start + urb_sf_size;
1068     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1069
1070     BEGIN_BATCH(ctx, 3);
1071     OUT_BATCH(ctx, 
1072               CMD_URB_FENCE |
1073               UF0_CS_REALLOC |
1074               UF0_SF_REALLOC |
1075               UF0_CLIP_REALLOC |
1076               UF0_GS_REALLOC |
1077               UF0_VS_REALLOC |
1078               1);
1079     OUT_BATCH(ctx, 
1080               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1081               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1082               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1083     OUT_BATCH(ctx,
1084               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1085               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1086     ADVANCE_BATCH(ctx);
1087 }
1088
1089 static void 
1090 i965_render_cs_urb_layout(VADriverContextP ctx)
1091 {
1092     BEGIN_BATCH(ctx, 2);
1093     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
1094     OUT_BATCH(ctx,
1095               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1096               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1097     ADVANCE_BATCH(ctx);
1098 }
1099
1100 static void
1101 i965_render_drawing_rectangle(VADriverContextP ctx)
1102 {
1103     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1104     struct i965_render_state *render_state = &i965->render_state;
1105     struct intel_region *dest_region = render_state->draw_region;
1106
1107     BEGIN_BATCH(ctx, 4);
1108     OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2);
1109     OUT_BATCH(ctx, 0x00000000);
1110     OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1111     OUT_BATCH(ctx, 0x00000000);         
1112     ADVANCE_BATCH(ctx);
1113 }
1114
1115 static void
1116 i965_render_vertex_elements(VADriverContextP ctx)
1117 {
1118     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1119
1120     if (IS_IGDNG(i965->intel.device_id)) {
1121         BEGIN_BATCH(ctx, 5);
1122         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1123         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1124         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1125                   VE0_VALID |
1126                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1127                   (0 << VE0_OFFSET_SHIFT));
1128         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1129                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1130                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1131                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1132         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1133         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1134                   VE0_VALID |
1135                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1136                   (8 << VE0_OFFSET_SHIFT));
1137         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1138                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1139                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1140                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1141         ADVANCE_BATCH(ctx);
1142     } else {
1143         BEGIN_BATCH(ctx, 5);
1144         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1145         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1146         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1147                   VE0_VALID |
1148                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1149                   (0 << VE0_OFFSET_SHIFT));
1150         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1151                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1152                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1153                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1154                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1155         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1156         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1157                   VE0_VALID |
1158                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1159                   (8 << VE0_OFFSET_SHIFT));
1160         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1161                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1162                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1163                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1164                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1165         ADVANCE_BATCH(ctx);
1166     }
1167 }
1168
1169 static void
1170 i965_render_upload_image_palette(
1171     VADriverContextP ctx,
1172     VAImageID        image_id,
1173     unsigned int     alpha
1174 )
1175 {
1176     struct i965_driver_data *i965 = i965_driver_data(ctx);
1177     unsigned int i;
1178
1179     struct object_image *obj_image = IMAGE(image_id);
1180     assert(obj_image);
1181
1182     if (obj_image->image.num_palette_entries == 0)
1183         return;
1184
1185     BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries);
1186     OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1187     /*fill palette*/
1188     //int32_t out[16]; //0-23:color 23-31:alpha
1189     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1190         OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]);
1191     ADVANCE_BATCH(ctx);
1192 }
1193
1194 static void
1195 i965_render_startup(VADriverContextP ctx)
1196 {
1197     struct i965_driver_data *i965 = i965_driver_data(ctx);
1198     struct i965_render_state *render_state = &i965->render_state;
1199
1200     BEGIN_BATCH(ctx, 11);
1201     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1202     OUT_BATCH(ctx, 
1203               (0 << VB0_BUFFER_INDEX_SHIFT) |
1204               VB0_VERTEXDATA |
1205               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1206     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1207
1208     if (IS_IGDNG(i965->intel.device_id))
1209         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1210     else
1211         OUT_BATCH(ctx, 3);
1212
1213     OUT_BATCH(ctx, 0);
1214
1215     OUT_BATCH(ctx, 
1216               CMD_3DPRIMITIVE |
1217               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1218               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1219               (0 << 9) |
1220               4);
1221     OUT_BATCH(ctx, 3); /* vertex count per instance */
1222     OUT_BATCH(ctx, 0); /* start vertex offset */
1223     OUT_BATCH(ctx, 1); /* single instance */
1224     OUT_BATCH(ctx, 0); /* start instance location */
1225     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1226     ADVANCE_BATCH(ctx);
1227 }
1228
1229 static void 
1230 i965_clear_dest_region(VADriverContextP ctx)
1231 {
1232     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1233     struct i965_render_state *render_state = &i965->render_state;
1234     struct intel_region *dest_region = render_state->draw_region;
1235     unsigned int blt_cmd, br13;
1236     int pitch;
1237
1238     blt_cmd = XY_COLOR_BLT_CMD;
1239     br13 = 0xf0 << 16;
1240     pitch = dest_region->pitch;
1241
1242     if (dest_region->cpp == 4) {
1243         br13 |= BR13_8888;
1244         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1245     } else {
1246         assert(dest_region->cpp == 2);
1247         br13 |= BR13_565;
1248     }
1249
1250     if (dest_region->tiling != I915_TILING_NONE) {
1251         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1252         pitch /= 4;
1253     }
1254
1255     br13 |= pitch;
1256
1257     BEGIN_BATCH(ctx, 6);
1258     OUT_BATCH(ctx, blt_cmd);
1259     OUT_BATCH(ctx, br13);
1260     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
1261     OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) |
1262               (dest_region->x + dest_region->width));
1263     OUT_RELOC(ctx, dest_region->bo, 
1264               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1265               0);
1266     OUT_BATCH(ctx, 0x0);
1267     ADVANCE_BATCH(ctx);
1268 }
1269
1270 static void
1271 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1272 {
1273     intel_batchbuffer_start_atomic(ctx, 0x1000);
1274     intel_batchbuffer_emit_mi_flush(ctx);
1275     i965_clear_dest_region(ctx);
1276     i965_render_pipeline_select(ctx);
1277     i965_render_state_sip(ctx);
1278     i965_render_state_base_address(ctx);
1279     i965_render_binding_table_pointers(ctx);
1280     i965_render_constant_color(ctx);
1281     i965_render_pipelined_pointers(ctx);
1282     i965_render_urb_layout(ctx);
1283     i965_render_cs_urb_layout(ctx);
1284     i965_render_drawing_rectangle(ctx);
1285     i965_render_vertex_elements(ctx);
1286     i965_render_startup(ctx);
1287     intel_batchbuffer_end_atomic(ctx);
1288 }
1289
1290 static void
1291 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1292 {
1293     intel_batchbuffer_start_atomic(ctx, 0x1000);
1294     intel_batchbuffer_emit_mi_flush(ctx);
1295     i965_render_pipeline_select(ctx);
1296     i965_render_state_sip(ctx);
1297     i965_render_state_base_address(ctx);
1298     i965_render_binding_table_pointers(ctx);
1299     i965_render_constant_color(ctx);
1300     i965_render_pipelined_pointers(ctx);
1301     i965_render_urb_layout(ctx);
1302     i965_render_cs_urb_layout(ctx);
1303     i965_render_drawing_rectangle(ctx);
1304     i965_render_vertex_elements(ctx);
1305     i965_render_startup(ctx);
1306     intel_batchbuffer_end_atomic(ctx);
1307 }
1308
1309
1310 static void 
1311 i965_render_initialize(VADriverContextP ctx)
1312 {
1313     struct i965_driver_data *i965 = i965_driver_data(ctx);
1314     struct i965_render_state *render_state = &i965->render_state;
1315     int i;
1316     dri_bo *bo;
1317
1318     /* VERTEX BUFFER */
1319     dri_bo_unreference(render_state->vb.vertex_buffer);
1320     bo = dri_bo_alloc(i965->intel.bufmgr,
1321                       "vertex buffer",
1322                       4096,
1323                       4096);
1324     assert(bo);
1325     render_state->vb.vertex_buffer = bo;
1326
1327     /* VS */
1328     dri_bo_unreference(render_state->vs.state);
1329     bo = dri_bo_alloc(i965->intel.bufmgr,
1330                       "vs state",
1331                       sizeof(struct i965_vs_unit_state),
1332                       64);
1333     assert(bo);
1334     render_state->vs.state = bo;
1335
1336     /* GS */
1337     /* CLIP */
1338     /* SF */
1339     dri_bo_unreference(render_state->sf.state);
1340     bo = dri_bo_alloc(i965->intel.bufmgr,
1341                       "sf state",
1342                       sizeof(struct i965_sf_unit_state),
1343                       64);
1344     assert(bo);
1345     render_state->sf.state = bo;
1346
1347     /* WM */
1348     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
1349         dri_bo_unreference(render_state->wm.surface[i]);
1350         render_state->wm.surface[i] = NULL;
1351     }
1352
1353     dri_bo_unreference(render_state->wm.binding_table);
1354     bo = dri_bo_alloc(i965->intel.bufmgr,
1355                       "binding table",
1356                       MAX_RENDER_SURFACES * sizeof(unsigned int),
1357                       64);
1358     assert(bo);
1359     render_state->wm.binding_table = bo;
1360
1361     dri_bo_unreference(render_state->wm.sampler);
1362     bo = dri_bo_alloc(i965->intel.bufmgr,
1363                       "sampler state",
1364                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1365                       64);
1366     assert(bo);
1367     render_state->wm.sampler = bo;
1368     render_state->wm.sampler_count = 0;
1369
1370     dri_bo_unreference(render_state->wm.state);
1371     bo = dri_bo_alloc(i965->intel.bufmgr,
1372                       "wm state",
1373                       sizeof(struct i965_wm_unit_state),
1374                       64);
1375     assert(bo);
1376     render_state->wm.state = bo;
1377
1378     /* COLOR CALCULATOR */
1379     dri_bo_unreference(render_state->cc.state);
1380     bo = dri_bo_alloc(i965->intel.bufmgr,
1381                       "color calc state",
1382                       sizeof(struct i965_cc_unit_state),
1383                       64);
1384     assert(bo);
1385     render_state->cc.state = bo;
1386
1387     dri_bo_unreference(render_state->cc.viewport);
1388     bo = dri_bo_alloc(i965->intel.bufmgr,
1389                       "cc viewport",
1390                       sizeof(struct i965_cc_viewport),
1391                       64);
1392     assert(bo);
1393     render_state->cc.viewport = bo;
1394 }
1395
1396 void
1397 i965_render_put_surface(VADriverContextP ctx,
1398                         VASurfaceID surface,
1399                         short srcx,
1400                         short srcy,
1401                         unsigned short srcw,
1402                         unsigned short srch,
1403                         short destx,
1404                         short desty,
1405                         unsigned short destw,
1406                         unsigned short desth)
1407 {
1408     i965_render_initialize(ctx);
1409     i965_surface_render_state_setup(ctx, surface,
1410                             srcx, srcy, srcw, srch,
1411                             destx, desty, destw, desth);
1412     i965_surface_render_pipeline_setup(ctx);
1413     intel_batchbuffer_flush(ctx);
1414 }
1415
1416 void
1417 i965_render_put_subpic(VADriverContextP ctx,
1418                         VASurfaceID surface,
1419                         short srcx,
1420                         short srcy,
1421                         unsigned short srcw,
1422                         unsigned short srch,
1423                         short destx,
1424                         short desty,
1425                         unsigned short destw,
1426                         unsigned short desth)
1427 {
1428     struct i965_driver_data *i965 = i965_driver_data(ctx);
1429     struct object_surface *obj_surface = SURFACE(surface);
1430     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1431     assert(obj_subpic);
1432
1433     i965_render_initialize(ctx);
1434     i965_subpic_render_state_setup(ctx, surface,
1435             srcx, srcy, srcw, srch,
1436             destx, desty, destw, desth);
1437     i965_subpic_render_pipeline_setup(ctx);
1438     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1439     intel_batchbuffer_flush(ctx);
1440 }
1441
1442
1443 Bool 
1444 i965_render_init(VADriverContextP ctx)
1445 {
1446     struct i965_driver_data *i965 = i965_driver_data(ctx);
1447     int i;
1448
1449     /* kernel */
1450     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
1451                                  sizeof(render_kernels_gen5[0])));
1452
1453     if (IS_IGDNG(i965->intel.device_id))
1454         render_kernels = render_kernels_gen5;
1455     else
1456         render_kernels = render_kernels_gen4;
1457
1458     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1459         struct render_kernel *kernel = &render_kernels[i];
1460         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
1461                                   kernel->name, 
1462                                   kernel->size, 64);
1463         assert(kernel->bo);
1464         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
1465     }
1466
1467     return True;
1468 }
1469
1470 Bool 
1471 i965_render_terminate(VADriverContextP ctx)
1472 {
1473     int i;
1474     struct i965_driver_data *i965 = i965_driver_data(ctx);
1475     struct i965_render_state *render_state = &i965->render_state;
1476
1477     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1478         struct render_kernel *kernel = &render_kernels[i];
1479         
1480         dri_bo_unreference(kernel->bo);
1481         kernel->bo = NULL;
1482     }
1483
1484     dri_bo_unreference(render_state->vb.vertex_buffer);
1485     render_state->vb.vertex_buffer = NULL;
1486     dri_bo_unreference(render_state->vs.state);
1487     render_state->vs.state = NULL;
1488     dri_bo_unreference(render_state->sf.state);
1489     render_state->sf.state = NULL;
1490     dri_bo_unreference(render_state->wm.binding_table);
1491     render_state->wm.binding_table = NULL;
1492     dri_bo_unreference(render_state->wm.sampler);
1493     render_state->wm.sampler = NULL;
1494     dri_bo_unreference(render_state->wm.state);
1495     render_state->wm.state = NULL;
1496
1497     for (i = 0; i < MAX_RENDER_SURFACES; i++) {
1498         dri_bo_unreference(render_state->wm.surface[i]);
1499         render_state->wm.surface[i] = NULL;
1500     }
1501
1502     dri_bo_unreference(render_state->cc.viewport);
1503     render_state->cc.viewport = NULL;
1504     dri_bo_unreference(render_state->cc.state);
1505     render_state->cc.state = NULL;
1506
1507     if (render_state->draw_region) {
1508         dri_bo_unreference(render_state->draw_region->bo);
1509         free(render_state->draw_region);
1510         render_state->draw_region = NULL;
1511     }
1512
1513     return True;
1514 }
1515