e68a98e532af21fcc0b89c94de4b06753b55e349
[platform/upstream/libva.git] / i965_drv_video / gen6_vme.c
1 /*
2  * Copyright © 2009 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include <va/va_backend.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "gen6_vme.h"
41 #include "i965_encoder.h"
42
43 #define VME_INTRA_SHADER        0       
44 #define VME_INTER_SHADER        1
45
46 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
47 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
48 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
49   
50 static const uint32_t gen6_vme_intra_frame[][4] = {
51 #include "shaders/vme/intra_frame.g6b"
52     {0,0,0,0}
53 };
54
55 static const uint32_t gen6_vme_inter_frame[][4] = {
56 #include "shaders/vme/inter_frame.g6b"
57     {0,0,0,0}
58 };
59
60 static struct i965_kernel gen6_vme_kernels[] = {
61     {
62         "VME Intra Frame",
63         VME_INTRA_SHADER,                                                                               /*index*/
64         gen6_vme_intra_frame,                   
65         sizeof(gen6_vme_intra_frame),           
66         NULL
67     },
68     {
69         "VME inter Frame",
70         VME_INTER_SHADER,
71         gen6_vme_inter_frame,
72         sizeof(gen6_vme_inter_frame),
73         NULL
74     }
75 };
76
77 #define GEN6_VME_KERNEL_NUMBER ARRAY_ELEMS(gen6_vme_kernels)
78
79 static void
80 gen6_vme_set_common_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
81 {
82     switch (tiling) {
83     case I915_TILING_NONE:
84         ss->ss3.tiled_surface = 0;
85         ss->ss3.tile_walk = 0;
86         break;
87     case I915_TILING_X:
88         ss->ss3.tiled_surface = 1;
89         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
90         break;
91     case I915_TILING_Y:
92         ss->ss3.tiled_surface = 1;
93         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
94         break;
95     }
96 }
97
98 static void
99 gen6_vme_set_source_surface_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
100 {
101     switch (tiling) {
102     case I915_TILING_NONE:
103         ss->ss2.tiled_surface = 0;
104         ss->ss2.tile_walk = 0;
105         break;
106     case I915_TILING_X:
107         ss->ss2.tiled_surface = 1;
108         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
109         break;
110     case I915_TILING_Y:
111         ss->ss2.tiled_surface = 1;
112         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
113         break;
114     }
115 }
116
117 /* only used for VME source surface state */
118 static void gen6_vme_source_surface_state(VADriverContextP ctx,
119                                           int index,
120                                           struct object_surface *obj_surface,
121                                           struct gen6_encoder_context *gen6_encoder_context)
122 {
123     struct i965_driver_data *i965 = i965_driver_data(ctx);  
124     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
125     struct i965_surface_state2 *ss;
126     dri_bo *bo;
127     int w, h, w_pitch, h_pitch;
128     unsigned int tiling, swizzle;
129
130     assert(obj_surface->bo);
131     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
132
133     w = obj_surface->orig_width;
134     h = obj_surface->orig_height;
135     w_pitch = obj_surface->width;
136     h_pitch = obj_surface->height;
137
138     bo = dri_bo_alloc(i965->intel.bufmgr, 
139                       "VME surface state", 
140                       sizeof(struct i965_surface_state2), 
141                       0x1000);
142     assert(bo);
143     dri_bo_map(bo, 1);
144     assert(bo->virtual);
145     ss = bo->virtual;
146     memset(ss, 0, sizeof(*ss));
147
148     ss->ss0.surface_base_address = obj_surface->bo->offset;
149
150     ss->ss1.cbcr_pixel_offset_v_direction = 2;
151     ss->ss1.width = w - 1;
152     ss->ss1.height = h - 1;
153
154     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
155     ss->ss2.interleave_chroma = 1;
156     ss->ss2.pitch = w_pitch - 1;
157     ss->ss2.half_pitch_for_chroma = 0;
158
159     gen6_vme_set_source_surface_tiling(ss, tiling);
160
161     /* UV offset for interleave mode */
162     ss->ss3.x_offset_for_cb = 0;
163     ss->ss3.y_offset_for_cb = h_pitch;
164
165     dri_bo_unmap(bo);
166
167     dri_bo_emit_reloc(bo,
168                       I915_GEM_DOMAIN_RENDER, 0,
169                       0,
170                       offsetof(struct i965_surface_state2, ss0),
171                       obj_surface->bo);
172
173     assert(index < MAX_MEDIA_SURFACES_GEN6);
174     vme_context->surface_state[index].bo = bo;
175 }
176
177 static void
178 gen6_vme_media_source_surface_state(VADriverContextP ctx,
179                                     int index,
180                                     struct object_surface *obj_surface,
181                                     struct gen6_encoder_context *gen6_encoder_context)
182 {
183     struct i965_driver_data *i965 = i965_driver_data(ctx);  
184     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
185     struct i965_surface_state *ss;
186     dri_bo *bo;
187     int w, h, w_pitch;
188     unsigned int tiling, swizzle;
189
190     w = obj_surface->orig_width;
191     h = obj_surface->orig_height;
192     w_pitch = obj_surface->width;
193
194     /* Y plane */
195     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
196     bo = dri_bo_alloc(i965->intel.bufmgr, 
197                       "surface state", 
198                       sizeof(struct i965_surface_state), 
199                       0x1000);
200     assert(bo);
201
202     dri_bo_map(bo, True);
203     assert(bo->virtual);
204     ss = bo->virtual;
205     memset(ss, 0, sizeof(*ss));
206     ss->ss0.surface_type = I965_SURFACE_2D;
207     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
208     ss->ss1.base_addr = obj_surface->bo->offset;
209     ss->ss2.width = w / 4 - 1;
210     ss->ss2.height = h - 1;
211     ss->ss3.pitch = w_pitch - 1;
212     gen6_vme_set_common_surface_tiling(ss, tiling);
213     dri_bo_emit_reloc(bo,
214                       I915_GEM_DOMAIN_RENDER, 
215                       0,
216                       0,
217                       offsetof(struct i965_surface_state, ss1),
218                       obj_surface->bo);
219     dri_bo_unmap(bo);
220
221     assert(index < MAX_MEDIA_SURFACES_GEN6);
222     vme_context->surface_state[index].bo = bo;
223 }
224
225 static VAStatus
226 gen6_vme_output_buffer_setup(VADriverContextP ctx,
227                              struct encode_state *encode_state,
228                              int index,
229                              struct gen6_encoder_context *gen6_encoder_context)
230
231 {
232     struct i965_driver_data *i965 = i965_driver_data(ctx);
233     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
234     struct i965_surface_state *ss;
235     dri_bo *bo;
236     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
237     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
238     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
239     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
240     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
241     int num_entries;
242
243     if ( is_intra ) {
244         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
245     } else {
246         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
247     }
248     vme_context->vme_output.size_block = 16; /* an OWORD */
249     vme_context->vme_output.pitch = ALIGN(vme_context->vme_output.size_block, 16);
250     bo = dri_bo_alloc(i965->intel.bufmgr, 
251                       "VME output buffer",
252                       vme_context->vme_output.num_blocks * vme_context->vme_output.pitch,
253                       0x1000);
254     assert(bo);
255     vme_context->vme_output.bo = bo;
256
257     bo = dri_bo_alloc(i965->intel.bufmgr, 
258                       "VME output buffer state", 
259                       sizeof(struct i965_surface_state), 
260                       0x1000);
261     assert(bo);
262     dri_bo_map(bo, 1);
263     assert(bo->virtual);
264     ss = bo->virtual;
265     memset(ss, 0, sizeof(*ss));
266
267     /* always use 16 bytes as pitch on Sandy Bridge */
268     num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.pitch / 16;
269     ss->ss0.render_cache_read_mode = 1;
270     ss->ss0.surface_type = I965_SURFACE_BUFFER;
271     ss->ss1.base_addr = vme_context->vme_output.bo->offset;
272     ss->ss2.width = ((num_entries - 1) & 0x7f);
273     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
274     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
275     ss->ss3.pitch = vme_context->vme_output.pitch - 1;
276     dri_bo_emit_reloc(bo,
277                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
278                       0,
279                       offsetof(struct i965_surface_state, ss1),
280                       vme_context->vme_output.bo);
281
282     dri_bo_unmap(bo);
283
284     assert(index < MAX_MEDIA_SURFACES_GEN6);
285     vme_context->surface_state[index].bo = bo;
286     return VA_STATUS_SUCCESS;
287 }
288
289 static VAStatus gen6_vme_surface_setup(VADriverContextP ctx, 
290                                        struct encode_state *encode_state,
291                                        int is_intra,
292                                        struct gen6_encoder_context *gen6_encoder_context)
293 {
294     struct i965_driver_data *i965 = i965_driver_data(ctx);
295     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
296     struct object_surface *obj_surface;
297     unsigned int *binding_table;
298     dri_bo *bo = vme_context->binding_table.bo;
299     int i;
300     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
301
302     /*Setup surfaces state*/
303     /* current picture for encoding */
304     obj_surface = SURFACE(encode_state->current_render_target);
305     assert(obj_surface);
306     gen6_vme_source_surface_state(ctx, 0, obj_surface, gen6_encoder_context);
307     gen6_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
308
309     if ( ! is_intra ) {
310         /* reference 0 */
311         obj_surface = SURFACE(pPicParameter->reference_picture);
312         assert(obj_surface);
313         gen6_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
314         /* reference 1, FIXME: */
315         // obj_surface = SURFACE(pPicParameter->reference_picture);
316         // assert(obj_surface);
317         //gen6_vme_source_surface_state(ctx, 2, obj_surface);
318     }
319
320     /* VME output */
321     gen6_vme_output_buffer_setup(ctx, encode_state, 3, gen6_encoder_context);
322
323     /*Building binding table*/
324     dri_bo_map(bo, 1); 
325     assert(bo->virtual);
326     binding_table = bo->virtual;
327     memset(binding_table, 0, bo->size);
328
329     for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
330         if (vme_context->surface_state[i].bo) {
331             binding_table[i] = vme_context->surface_state[i].bo->offset;
332             dri_bo_emit_reloc(bo,
333                               I915_GEM_DOMAIN_INSTRUCTION, 0,
334                               0,  
335                               i * sizeof(*binding_table),
336                               vme_context->surface_state[i].bo);
337         }   
338     }   
339
340     dri_bo_unmap(vme_context->binding_table.bo);
341
342     return VA_STATUS_SUCCESS;
343 }
344
345 static VAStatus gen6_vme_interface_setup(VADriverContextP ctx, 
346                                          struct encode_state *encode_state,
347                                          struct gen6_encoder_context *gen6_encoder_context)
348 {
349     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
350     struct gen6_interface_descriptor_data *desc;   
351     int i;
352     dri_bo *bo;
353
354     bo = vme_context->idrt.bo;
355     dri_bo_map(bo, 1);
356     assert(bo->virtual);
357     desc = bo->virtual;
358
359     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
360         struct i965_kernel *kernel;
361         kernel = &gen6_vme_kernels[i];
362         assert(sizeof(*desc) == 32);
363         /*Setup the descritor table*/
364         memset(desc, 0, sizeof(*desc));
365         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
366         desc->desc2.sampler_count = 1; /* FIXME: */
367         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
368         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
369         desc->desc3.binding_table_pointer = (vme_context->binding_table.bo->offset >> 5);
370         desc->desc4.constant_urb_entry_read_offset = 0;
371         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
372                 
373         /*kernel start*/
374         dri_bo_emit_reloc(bo,   
375                           I915_GEM_DOMAIN_INSTRUCTION, 0,
376                           0,
377                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
378                           kernel->bo);
379         /*Sampler State(VME state pointer)*/
380         dri_bo_emit_reloc(bo,
381                           I915_GEM_DOMAIN_INSTRUCTION, 0,
382                           (1 << 2),                                                                     //
383                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
384                           vme_context->vme_state.bo);
385         /*binding table*/
386         dri_bo_emit_reloc(bo,
387                           I915_GEM_DOMAIN_INSTRUCTION, 0,
388                           4,                                                                    //One Entry
389                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc3),
390                           vme_context->binding_table.bo);
391         desc++;
392     }
393     dri_bo_unmap(bo);
394
395     return VA_STATUS_SUCCESS;
396 }
397
398 static VAStatus gen6_vme_constant_setup(VADriverContextP ctx, 
399                                         struct encode_state *encode_state,
400                                         struct gen6_encoder_context *gen6_encoder_context)
401 {
402     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
403     unsigned char *constant_buffer;
404
405     dri_bo_map(vme_context->curbe.bo, 1);
406     assert(vme_context->curbe.bo->virtual);
407     constant_buffer = vme_context->curbe.bo->virtual;
408         
409     /*TODO copy buffer into CURB*/
410
411     dri_bo_unmap( vme_context->curbe.bo);
412
413     return VA_STATUS_SUCCESS;
414 }
415
416 static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
417                                          struct encode_state *encode_state,
418                                          int is_intra,
419                                          struct gen6_encoder_context *gen6_encoder_context)
420 {
421     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
422     unsigned int *vme_state_message;
423     int i;
424         
425     //building VME state message
426     dri_bo_map(vme_context->vme_state.bo, 1);
427     assert(vme_context->vme_state.bo->virtual);
428     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
429         
430     for(i = 0;i < 32; i++) {
431         vme_state_message[i] = 0x11;
432     }           
433     vme_state_message[16] = 0x42424242;                 //cost function LUT set 0 for Intra
434
435     dri_bo_unmap( vme_context->vme_state.bo);
436     return VA_STATUS_SUCCESS;
437 }
438
439 static void gen6_vme_pipeline_select(VADriverContextP ctx)
440 {
441     BEGIN_BATCH(ctx, 1);
442     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
443     ADVANCE_BATCH(ctx);
444 }
445
446 static void gen6_vme_state_base_address(VADriverContextP ctx)
447 {
448     BEGIN_BATCH(ctx, 10);
449
450     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 8);
451
452     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                            //General State Base Address
453     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                            //Surface State Base Address    
454     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                            //Dynamic State Base Address
455     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                            //Indirect Object Base Address
456     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                            //Instruction Base Address
457
458     OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY);           //General State Access Upper Bound      
459     OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY);           //Dynamic State Access Upper Bound
460     OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY);           //Indirect Object Access Upper Bound
461     OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY);           //Instruction Access Upper Bound
462
463     /*
464       OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);                          //LLC Coherent Base Address
465       OUT_BATCH(ctx, 0xFFFFF000 | BASE_ADDRESS_MODIFY );                //LLC Coherent Upper Bound
466     */
467
468     ADVANCE_BATCH(ctx);
469 }
470
471 static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
472 {
473     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
474
475     BEGIN_BATCH(ctx, 8);
476
477     OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | 6);                                    /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
478     OUT_BATCH(ctx, 0);                                                                                          /*Scratch Space Base Pointer and Space*/
479     OUT_BATCH(ctx, (vme_context->vfe_state.max_num_threads << 16) 
480               | (vme_context->vfe_state.num_urb_entries << 8) 
481               | (vme_context->vfe_state.gpgpu_mode << 2) );     /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
482     OUT_BATCH(ctx, 0);                                                                                          /*Debug: Object ID*/
483     OUT_BATCH(ctx, (vme_context->vfe_state.urb_entry_size << 16) 
484               | vme_context->vfe_state.curbe_allocation_size);                          /*URB Entry Allocation Size , CURBE Allocation Size*/
485     OUT_BATCH(ctx, 0);                                                                                  /*Disable Scoreboard*/
486     OUT_BATCH(ctx, 0);                                                                                  /*Disable Scoreboard*/
487     OUT_BATCH(ctx, 0);                                                                                  /*Disable Scoreboard*/
488         
489     ADVANCE_BATCH(ctx);
490
491 }
492
493 static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
494 {
495     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
496
497     BEGIN_BATCH(ctx, 4);
498
499     OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | 2);
500     OUT_BATCH(ctx, 0);
501
502     OUT_BATCH(ctx, CURBE_TOTAL_DATA_LENGTH);
503     OUT_RELOC(ctx, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
504
505     ADVANCE_BATCH(ctx);
506 }
507
508 static void gen6_vme_idrt(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
509 {
510     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
511
512     BEGIN_BATCH(ctx, 4);
513
514     OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_LOAD | 2);       
515     OUT_BATCH(ctx, 0);
516     OUT_BATCH(ctx, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
517     OUT_RELOC(ctx, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
518
519     ADVANCE_BATCH(ctx);
520 }
521
522 static int gen6_vme_media_object(VADriverContextP ctx, 
523                                  struct encode_state *encode_state,
524                                  int mb_x, int mb_y,
525                                  int kernel)
526 {
527     struct i965_driver_data *i965 = i965_driver_data(ctx);
528     struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
529     int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
530     int len_in_dowrds = 6 + 1;
531
532     BEGIN_BATCH(ctx, len_in_dowrds);
533     
534     OUT_BATCH(ctx, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
535     OUT_BATCH(ctx, kernel);             /*Interface Descriptor Offset*/ 
536     OUT_BATCH(ctx, 0);
537     OUT_BATCH(ctx, 0);
538     OUT_BATCH(ctx, 0);
539     OUT_BATCH(ctx, 0);
540    
541     /*inline data */
542     OUT_BATCH(ctx, mb_width << 16 | mb_y << 8 | mb_x);                  /*M0.0 Refrence0 X,Y, not used in Intra*/
543     ADVANCE_BATCH(ctx);
544
545     return len_in_dowrds * 4;
546 }
547
548 static void gen6_vme_media_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
549 {
550     int i;
551     struct i965_driver_data *i965 = i965_driver_data(ctx);
552     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
553     dri_bo *bo;
554
555     /* constant buffer */
556     dri_bo_unreference(vme_context->curbe.bo);
557     bo = dri_bo_alloc(i965->intel.bufmgr,
558                       "Buffer",
559                       CURBE_TOTAL_DATA_LENGTH, 64);
560     assert(bo);
561     vme_context->curbe.bo = bo;
562
563     /* surface state */
564     for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
565         dri_bo_unreference(vme_context->surface_state[i].bo);
566         vme_context->surface_state[i].bo = NULL;
567     }
568
569     /* binding table */
570     dri_bo_unreference(vme_context->binding_table.bo);
571     bo = dri_bo_alloc(i965->intel.bufmgr, 
572                       "Buffer",
573                       MAX_MEDIA_SURFACES_GEN6 * sizeof(unsigned int), 32);
574     assert(bo);
575     vme_context->binding_table.bo = bo;
576
577     /* interface descriptor remapping table */
578     dri_bo_unreference(vme_context->idrt.bo);
579     bo = dri_bo_alloc(i965->intel.bufmgr, 
580                       "Buffer", 
581                       MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
582     assert(bo);
583     vme_context->idrt.bo = bo;
584
585     /* VME output buffer */
586     dri_bo_unreference(vme_context->vme_output.bo);
587     vme_context->vme_output.bo = NULL;
588
589     /* VME state */
590     dri_bo_unreference(vme_context->vme_state.bo);
591     bo = dri_bo_alloc(i965->intel.bufmgr,
592                       "Buffer",
593                       1024*16, 64);
594     assert(bo);
595     vme_context->vme_state.bo = bo;
596
597     vme_context->vfe_state.max_num_threads = 60 - 1;
598     vme_context->vfe_state.num_urb_entries = 16;
599     vme_context->vfe_state.gpgpu_mode = 0;
600     vme_context->vfe_state.urb_entry_size = 59 - 1;
601     vme_context->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
602 }
603
604 static void gen6_vme_pipeline_programing(VADriverContextP ctx, 
605                                          struct encode_state *encode_state,
606                                          struct gen6_encoder_context *gen6_encoder_context)
607 {
608     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
609     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
610     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
611     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
612     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
613     int emit_new_state = 1, object_len_in_bytes;
614     int x, y;
615
616     intel_batchbuffer_start_atomic(ctx, 0x1000);
617
618     for(y = 0; y < height_in_mbs; y++){
619         for(x = 0; x < width_in_mbs; x++){      
620
621             if (emit_new_state) {
622                 /*Step1: MI_FLUSH/PIPE_CONTROL*/
623                 BEGIN_BATCH(ctx, 4);
624                 OUT_BATCH(ctx, CMD_PIPE_CONTROL | 0x02);
625                 OUT_BATCH(ctx, 0);
626                 OUT_BATCH(ctx, 0);
627                 OUT_BATCH(ctx, 0);
628                 ADVANCE_BATCH(ctx);
629
630                 /*Step2: State command PIPELINE_SELECT*/
631                 gen6_vme_pipeline_select(ctx);
632
633                 /*Step3: State commands configuring pipeline states*/
634                 gen6_vme_state_base_address(ctx);
635                 gen6_vme_vfe_state(ctx, gen6_encoder_context);
636                 gen6_vme_curbe_load(ctx, gen6_encoder_context);
637                 gen6_vme_idrt(ctx, gen6_encoder_context);
638
639                 emit_new_state = 0;
640             }
641
642             /*Step4: Primitive commands*/
643             object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
644
645             if (intel_batchbuffer_check_free_space(ctx, object_len_in_bytes) == 0) {
646                 intel_batchbuffer_end_atomic(ctx);      
647                 intel_batchbuffer_flush(ctx);
648                 emit_new_state = 1;
649                 intel_batchbuffer_start_atomic(ctx, 0x1000);
650             }
651         }
652     }
653
654     intel_batchbuffer_end_atomic(ctx);  
655 }
656
657 static VAStatus gen6_vme_prepare(VADriverContextP ctx, 
658                                  struct encode_state *encode_state,
659                                  struct gen6_encoder_context *gen6_encoder_context)
660 {
661     VAStatus vaStatus = VA_STATUS_SUCCESS;
662     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
663     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
664         
665     /*Setup all the memory object*/
666     gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
667     gen6_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
668     gen6_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
669     gen6_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
670
671     /*Programing media pipeline*/
672     gen6_vme_pipeline_programing(ctx, encode_state, gen6_encoder_context);
673
674     return vaStatus;
675 }
676
677 static VAStatus gen6_vme_run(VADriverContextP ctx, 
678                              struct encode_state *encode_state,
679                              struct gen6_encoder_context *gen6_encoder_context)
680 {
681     intel_batchbuffer_flush(ctx);
682
683     return VA_STATUS_SUCCESS;
684 }
685
686 static VAStatus gen6_vme_stop(VADriverContextP ctx, 
687                               struct encode_state *encode_state,
688                               struct gen6_encoder_context *gen6_encoder_context)
689 {
690     return VA_STATUS_SUCCESS;
691 }
692
693 VAStatus gen6_vme_pipeline(VADriverContextP ctx,
694                            VAProfile profile,
695                            struct encode_state *encode_state,
696                            struct gen6_encoder_context *gen6_encoder_context)
697 {
698     gen6_vme_media_init(ctx, gen6_encoder_context);
699     gen6_vme_prepare(ctx, encode_state, gen6_encoder_context);
700     gen6_vme_run(ctx, encode_state, gen6_encoder_context);
701     gen6_vme_stop(ctx, encode_state, gen6_encoder_context);
702
703     return VA_STATUS_SUCCESS;
704 }
705
706 Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
707 {
708     struct i965_driver_data *i965 = i965_driver_data(ctx);
709     int i;
710
711     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
712         /*Load kernel into GPU memory*/ 
713         struct i965_kernel *kernel = &gen6_vme_kernels[i];
714
715         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
716                                   kernel->name, 
717                                   kernel->size,
718                                   0x1000);
719         assert(kernel->bo);
720         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
721     }
722     
723     return True;
724 }
725
726 Bool gen6_vme_context_destroy(struct gen6_vme_context *vme_context)
727 {
728     int i;
729
730     for (i = 0; i < MAX_MEDIA_SURFACES_GEN6; i++) {
731         dri_bo_unreference(vme_context->surface_state[i].bo);
732         vme_context->surface_state[i].bo = NULL;
733     }
734     
735     dri_bo_unreference(vme_context->idrt.bo);
736     vme_context->idrt.bo = NULL;
737
738     dri_bo_unreference(vme_context->binding_table.bo);
739     vme_context->binding_table.bo = NULL;
740
741     dri_bo_unreference(vme_context->curbe.bo);
742     vme_context->curbe.bo = NULL;
743
744     dri_bo_unreference(vme_context->vme_output.bo);
745     vme_context->vme_output.bo = NULL;
746
747     dri_bo_unreference(vme_context->vme_state.bo);
748     vme_context->vme_state.bo = NULL;
749
750     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
751         /*Load kernel into GPU memory*/ 
752         struct i965_kernel *kernel = &gen6_vme_kernels[i];
753
754         dri_bo_unreference(kernel->bo);
755         kernel->bo = NULL;
756     }
757
758     return True;
759 }