ed8685f9ba548506409f003d0b030e92d04a07e0
[profile/ivi/vaapi-intel-driver.git] / src / gen7_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "gen6_vme.h"
41 #include "gen6_mfc.h"
42 #ifdef SURFACE_STATE_PADDED_SIZE
43 #undef SURFACE_STATE_PADDED_SIZE
44 #endif
45
46 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
49
50 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN7
51 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
52 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
53
54 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
55 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
56 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
57
58 enum VIDEO_CODING_TYPE{
59     VIDEO_CODING_AVC = 0,
60     VIDEO_CODING_MPEG2,
61     VIDEO_CODING_SUM
62 };
63
64 enum AVC_VME_KERNEL_TYPE{ 
65     AVC_VME_INTRA_SHADER = 0,
66     AVC_VME_INTER_SHADER,
67     AVC_VME_BATCHBUFFER,
68     AVC_VME_KERNEL_SUM
69 };
70
71 enum MPEG2_VME_KERNEL_TYPE{
72     MPEG2_VME_INTER_SHADER = 0,
73     MPEG2_VME_BATCHBUFFER,
74     MPEG2_VME_KERNEL_SUM
75 };
76  
77 static const uint32_t gen7_vme_intra_frame[][4] = {
78 #include "shaders/vme/intra_frame.g7b"
79 };
80
81 static const uint32_t gen7_vme_inter_frame[][4] = {
82 #include "shaders/vme/inter_frame.g7b"
83 };
84
85 static const uint32_t gen7_vme_batchbuffer[][4] = {
86 #include "shaders/vme/batchbuffer.g7b"
87 };
88
89 static struct i965_kernel gen7_vme_kernels[] = {
90     {
91         "AVC VME Intra Frame",
92         AVC_VME_INTRA_SHADER,                   /*index*/
93         gen7_vme_intra_frame,                   
94         sizeof(gen7_vme_intra_frame),           
95         NULL
96     },
97     {
98         "AVC VME inter Frame",
99         AVC_VME_INTER_SHADER,
100         gen7_vme_inter_frame,
101         sizeof(gen7_vme_inter_frame),
102         NULL
103     },
104     {
105         "AVC VME BATCHBUFFER",
106         AVC_VME_BATCHBUFFER,
107         gen7_vme_batchbuffer,
108         sizeof(gen7_vme_batchbuffer),
109         NULL
110     },
111 };
112
113 static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
114 #include "shaders/vme/mpeg2_inter_frame.g7b"
115 };
116
117 static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
118 #include "shaders/vme/batchbuffer.g7b"
119 };
120
121 static struct i965_kernel gen7_vme_mpeg2_kernels[] = {
122     {
123         "MPEG2 VME inter Frame",
124         MPEG2_VME_INTER_SHADER,
125         gen7_vme_mpeg2_inter_frame,
126         sizeof(gen7_vme_mpeg2_inter_frame),
127         NULL
128     },
129     {
130         "MPEG2 VME BATCHBUFFER",
131         MPEG2_VME_BATCHBUFFER,
132         gen7_vme_mpeg2_batchbuffer,
133         sizeof(gen7_vme_mpeg2_batchbuffer),
134         NULL
135     },
136 };
137
138 /* only used for VME source surface state */
139 static void 
140 gen7_vme_source_surface_state(VADriverContextP ctx,
141                               int index,
142                               struct object_surface *obj_surface,
143                               struct intel_encoder_context *encoder_context)
144 {
145     struct gen6_vme_context *vme_context = encoder_context->vme_context;
146
147     vme_context->vme_surface2_setup(ctx,
148                                     &vme_context->gpe_context,
149                                     obj_surface,
150                                     BINDING_TABLE_OFFSET(index),
151                                     SURFACE_STATE_OFFSET(index));
152 }
153
154 static void
155 gen7_vme_media_source_surface_state(VADriverContextP ctx,
156                                     int index,
157                                     struct object_surface *obj_surface,
158                                     struct intel_encoder_context *encoder_context)
159 {
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     vme_context->vme_media_rw_surface_setup(ctx,
163                                             &vme_context->gpe_context,
164                                             obj_surface,
165                                             BINDING_TABLE_OFFSET(index),
166                                             SURFACE_STATE_OFFSET(index));
167 }
168
169 static void
170 gen7_vme_output_buffer_setup(VADriverContextP ctx,
171                              struct encode_state *encode_state,
172                              int index,
173                              struct intel_encoder_context *encoder_context)
174
175 {
176     struct i965_driver_data *i965 = i965_driver_data(ctx);
177     struct gen6_vme_context *vme_context = encoder_context->vme_context;
178     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
179     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
180     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
181     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
182     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
183
184     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
185     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
186
187     if (is_intra)
188         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
189     else
190         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
191
192     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
193                                               "VME output buffer",
194                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
195                                               0x1000);
196     assert(vme_context->vme_output.bo);
197     vme_context->vme_buffer_suface_setup(ctx,
198                                          &vme_context->gpe_context,
199                                          &vme_context->vme_output,
200                                          BINDING_TABLE_OFFSET(index),
201                                          SURFACE_STATE_OFFSET(index));
202 }
203
204 static void
205 gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
206                                       struct encode_state *encode_state,
207                                       int index,
208                                       struct intel_encoder_context *encoder_context)
209
210 {
211     struct i965_driver_data *i965 = i965_driver_data(ctx);
212     struct gen6_vme_context *vme_context = encoder_context->vme_context;
213     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
214     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
215     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
216
217     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
218     vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
219     vme_context->vme_batchbuffer.pitch = 16;
220     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
221                                                    "VME batchbuffer",
222                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
223                                                    0x1000);
224     vme_context->vme_buffer_suface_setup(ctx,
225                                          &vme_context->gpe_context,
226                                          &vme_context->vme_batchbuffer,
227                                          BINDING_TABLE_OFFSET(index),
228                                          SURFACE_STATE_OFFSET(index));
229 }
230
231 static VAStatus
232 gen7_vme_surface_setup(VADriverContextP ctx, 
233                        struct encode_state *encode_state,
234                        int is_intra,
235                        struct intel_encoder_context *encoder_context)
236 {
237     struct i965_driver_data *i965 = i965_driver_data(ctx);
238     struct object_surface *obj_surface;
239     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
240
241     /*Setup surfaces state*/
242     /* current picture for encoding */
243     obj_surface = SURFACE(encoder_context->input_yuv_surface);
244     assert(obj_surface);
245     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
246     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
247
248     if (!is_intra) {
249         /* reference 0 */
250         obj_surface = SURFACE(pPicParameter->ReferenceFrames[0].picture_id);
251         assert(obj_surface);
252         if ( obj_surface->bo != NULL)
253             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
254
255         /* reference 1 */
256         obj_surface = SURFACE(pPicParameter->ReferenceFrames[1].picture_id);
257         assert(obj_surface);
258         if ( obj_surface->bo != NULL ) 
259             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
260     }
261
262     /* VME output */
263     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
264     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
265
266     return VA_STATUS_SUCCESS;
267 }
268
269 static VAStatus gen7_vme_interface_setup(VADriverContextP ctx, 
270                                          struct encode_state *encode_state,
271                                          struct intel_encoder_context *encoder_context)
272 {
273     struct gen6_vme_context *vme_context = encoder_context->vme_context;
274     struct gen6_interface_descriptor_data *desc;   
275     int i;
276     dri_bo *bo;
277
278     bo = vme_context->gpe_context.idrt.bo;
279     dri_bo_map(bo, 1);
280     assert(bo->virtual);
281     desc = bo->virtual;
282
283     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
284         struct i965_kernel *kernel;
285         kernel = &vme_context->gpe_context.kernels[i];
286         assert(sizeof(*desc) == 32);
287         /*Setup the descritor table*/
288         memset(desc, 0, sizeof(*desc));
289         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
290         desc->desc2.sampler_count = 1; /* FIXME: */
291         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
292         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
293         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
294         desc->desc4.constant_urb_entry_read_offset = 0;
295         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
296                 
297         /*kernel start*/
298         dri_bo_emit_reloc(bo,   
299                           I915_GEM_DOMAIN_INSTRUCTION, 0,
300                           0,
301                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
302                           kernel->bo);
303         /*Sampler State(VME state pointer)*/
304         dri_bo_emit_reloc(bo,
305                           I915_GEM_DOMAIN_INSTRUCTION, 0,
306                           (1 << 2),                                                                     //
307                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
308                           vme_context->vme_state.bo);
309         desc++;
310     }
311     dri_bo_unmap(bo);
312
313     return VA_STATUS_SUCCESS;
314 }
315
316 static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, 
317                                         struct encode_state *encode_state,
318                                         struct intel_encoder_context *encoder_context)
319 {
320     struct gen6_vme_context *vme_context = encoder_context->vme_context;
321     // unsigned char *constant_buffer;
322     unsigned int *vme_state_message;
323     int mv_num = 32;
324     if (vme_context->h264_level >= 30) {
325         mv_num = 16;
326         if (vme_context->h264_level >= 31)
327                 mv_num = 8;
328     } 
329
330     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
331     assert(vme_context->gpe_context.curbe.bo->virtual);
332     // constant_buffer = vme_context->curbe.bo->virtual;
333     vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual;
334     vme_state_message[31] = mv_num;
335         
336     /*TODO copy buffer into CURB*/
337
338     dri_bo_unmap( vme_context->gpe_context.curbe.bo);
339
340     return VA_STATUS_SUCCESS;
341 }
342
343 static const unsigned int intra_mb_mode_cost_table[] = {
344     0x31110001, // for qp0
345     0x09110001, // for qp1
346     0x15030001, // for qp2
347     0x0b030001, // for qp3
348     0x0d030011, // for qp4
349     0x17210011, // for qp5
350     0x41210011, // for qp6
351     0x19210011, // for qp7
352     0x25050003, // for qp8
353     0x1b130003, // for qp9
354     0x1d130003, // for qp10
355     0x27070021, // for qp11
356     0x51310021, // for qp12
357     0x29090021, // for qp13
358     0x35150005, // for qp14
359     0x2b0b0013, // for qp15
360     0x2d0d0013, // for qp16
361     0x37170007, // for qp17
362     0x61410031, // for qp18
363     0x39190009, // for qp19
364     0x45250015, // for qp20
365     0x3b1b000b, // for qp21
366     0x3d1d000d, // for qp22
367     0x47270017, // for qp23
368     0x71510041, // for qp24 ! center for qp=0..30
369     0x49290019, // for qp25
370     0x55350025, // for qp26
371     0x4b2b001b, // for qp27
372     0x4d2d001d, // for qp28
373     0x57370027, // for qp29
374     0x81610051, // for qp30
375     0x57270017, // for qp31
376     0x81510041, // for qp32 ! center for qp=31..51
377     0x59290019, // for qp33
378     0x65350025, // for qp34
379     0x5b2b001b, // for qp35
380     0x5d2d001d, // for qp36
381     0x67370027, // for qp37
382     0x91610051, // for qp38
383     0x69390029, // for qp39
384     0x75450035, // for qp40
385     0x6b3b002b, // for qp41
386     0x6d3d002d, // for qp42
387     0x77470037, // for qp43
388     0xa1710061, // for qp44
389     0x79490039, // for qp45
390     0x85550045, // for qp46
391     0x7b4b003b, // for qp47
392     0x7d4d003d, // for qp48
393     0x87570047, // for qp49
394     0xb1810071, // for qp50
395     0x89590049  // for qp51
396 };
397
398 static void gen7_vme_state_setup_fixup(VADriverContextP ctx,
399                                        struct encode_state *encode_state,
400                                        struct intel_encoder_context *encoder_context,
401                                        unsigned int *vme_state_message)
402 {
403     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
404     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
405     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
406
407     if (slice_param->slice_type != SLICE_TYPE_I &&
408         slice_param->slice_type != SLICE_TYPE_SI)
409         return;
410     if (encoder_context->rate_control_mode == VA_RC_CQP)
411         vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
412     else
413         vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
414 }
415
416 static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx,
417                                          struct encode_state *encode_state,
418                                          int is_intra,
419                                          struct intel_encoder_context *encoder_context)
420 {
421     struct gen6_vme_context *vme_context = encoder_context->vme_context;
422     unsigned int *vme_state_message;
423     int i;
424         
425     //building VME state message
426     dri_bo_map(vme_context->vme_state.bo, 1);
427     assert(vme_context->vme_state.bo->virtual);
428     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
429
430     vme_state_message[0] = 0x01010101;
431     vme_state_message[1] = 0x10010101;
432     vme_state_message[2] = 0x0F0F0F0F;
433     vme_state_message[3] = 0x100F0F0F;
434     vme_state_message[4] = 0x01010101;
435     vme_state_message[5] = 0x10010101;
436     vme_state_message[6] = 0x0F0F0F0F;
437     vme_state_message[7] = 0x100F0F0F;
438     vme_state_message[8] = 0x01010101;
439     vme_state_message[9] = 0x10010101;
440     vme_state_message[10] = 0x0F0F0F0F;
441     vme_state_message[11] = 0x000F0F0F;
442     vme_state_message[12] = 0x00;
443     vme_state_message[13] = 0x00;
444
445     vme_state_message[14] = 0x4a4a;
446     vme_state_message[15] = 0x0;
447     vme_state_message[16] = 0x4a4a4a4a;
448     vme_state_message[17] = 0x4a4a4a4a;
449     vme_state_message[18] = 0x21110100;
450     vme_state_message[19] = 0x61514131;
451
452     for(i = 20; i < 32; i++) {
453         vme_state_message[i] = 0;
454     }
455     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
456
457     gen7_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
458
459     dri_bo_unmap( vme_context->vme_state.bo);
460     return VA_STATUS_SUCCESS;
461 }
462
463 static void
464 gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
465                               struct encode_state *encode_state,
466                               int mb_width, int mb_height,
467                               int kernel,
468                               int transform_8x8_mode_flag,
469                               struct intel_encoder_context *encoder_context)
470 {
471     struct gen6_vme_context *vme_context = encoder_context->vme_context;
472     int number_mb_cmds;
473     int mb_x = 0, mb_y = 0;
474     int i, s;
475     unsigned int *command_ptr;
476
477     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
478     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
479
480     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
481         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
482         int slice_mb_begin = pSliceParameter->macroblock_address;
483         int slice_mb_number = pSliceParameter->num_macroblocks;
484         
485         for (i = 0; i < slice_mb_number;  ) {
486             int mb_count = i + slice_mb_begin;    
487             mb_x = mb_count % mb_width;
488             mb_y = mb_count / mb_width;
489             if( i == 0 ) {
490                 number_mb_cmds = mb_width;          // we must mark the slice edge. 
491             } else if ( (i + 128 ) <= slice_mb_number) {
492                 number_mb_cmds = 128;
493             } else {
494                 number_mb_cmds = slice_mb_number - i;
495             }
496
497             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
498             *command_ptr++ = kernel;
499             *command_ptr++ = 0;
500             *command_ptr++ = 0;
501             *command_ptr++ = 0;
502             *command_ptr++ = 0;
503    
504             /*inline data */
505             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
506             *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag | ((i==0) << 1));
507
508             i += number_mb_cmds;
509         } 
510     }
511
512     *command_ptr++ = 0;
513     *command_ptr++ = MI_BATCH_BUFFER_END;
514
515     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
516 }
517
518 static void gen7_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
519 {
520     struct i965_driver_data *i965 = i965_driver_data(ctx);
521     struct gen6_vme_context *vme_context = encoder_context->vme_context;
522     dri_bo *bo;
523
524     i965_gpe_context_init(ctx, &vme_context->gpe_context);
525
526     /* VME output buffer */
527     dri_bo_unreference(vme_context->vme_output.bo);
528     vme_context->vme_output.bo = NULL;
529
530     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
531     vme_context->vme_batchbuffer.bo = NULL;
532
533     /* VME state */
534     dri_bo_unreference(vme_context->vme_state.bo);
535     bo = dri_bo_alloc(i965->intel.bufmgr,
536                       "Buffer",
537                       1024*16, 64);
538     assert(bo);
539     vme_context->vme_state.bo = bo;
540 }
541
542 static void gen7_vme_pipeline_programing(VADriverContextP ctx, 
543                                          struct encode_state *encode_state,
544                                          struct intel_encoder_context *encoder_context)
545 {
546     struct gen6_vme_context *vme_context = encoder_context->vme_context;
547     struct intel_batchbuffer *batch = encoder_context->base.batch;
548     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
549     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
550     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
551     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
552     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
553     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
554
555     gen7_vme_fill_vme_batchbuffer(ctx, 
556                                   encode_state,
557                                   width_in_mbs, height_in_mbs,
558                                   is_intra ? AVC_VME_INTRA_SHADER : AVC_VME_INTER_SHADER, 
559                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
560                                   encoder_context);
561
562     intel_batchbuffer_start_atomic(batch, 0x1000);
563     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
564     BEGIN_BATCH(batch, 2);
565     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
566     OUT_RELOC(batch,
567               vme_context->vme_batchbuffer.bo,
568               I915_GEM_DOMAIN_COMMAND, 0, 
569               0);
570     ADVANCE_BATCH(batch);
571
572     intel_batchbuffer_end_atomic(batch);        
573 }
574
575 static VAStatus gen7_vme_prepare(VADriverContextP ctx, 
576                                  struct encode_state *encode_state,
577                                  struct intel_encoder_context *encoder_context)
578 {
579     VAStatus vaStatus = VA_STATUS_SUCCESS;
580     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
581     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
582     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
583     struct gen6_vme_context *vme_context = encoder_context->vme_context;
584
585     if (!vme_context->h264_level ||
586                 (vme_context->h264_level != pSequenceParameter->level_idc)) {
587         vme_context->h264_level = pSequenceParameter->level_idc;        
588     }   
589     /*Setup all the memory object*/
590     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
591     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
592     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
593     gen7_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
594
595     /*Programing media pipeline*/
596     gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
597
598     return vaStatus;
599 }
600
601 static VAStatus gen7_vme_run(VADriverContextP ctx, 
602                              struct encode_state *encode_state,
603                              struct intel_encoder_context *encoder_context)
604 {
605     struct intel_batchbuffer *batch = encoder_context->base.batch;
606
607     intel_batchbuffer_flush(batch);
608
609     return VA_STATUS_SUCCESS;
610 }
611
612 static VAStatus gen7_vme_stop(VADriverContextP ctx, 
613                               struct encode_state *encode_state,
614                               struct intel_encoder_context *encoder_context)
615 {
616     return VA_STATUS_SUCCESS;
617 }
618
619 static VAStatus
620 gen7_vme_pipeline(VADriverContextP ctx,
621                   VAProfile profile,
622                   struct encode_state *encode_state,
623                   struct intel_encoder_context *encoder_context)
624 {
625     gen7_vme_media_init(ctx, encoder_context);
626     gen7_vme_prepare(ctx, encode_state, encoder_context);
627     gen7_vme_run(ctx, encode_state, encoder_context);
628     gen7_vme_stop(ctx, encode_state, encoder_context);
629
630     return VA_STATUS_SUCCESS;
631 }
632
633 static void
634 gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
635                                     struct encode_state *encode_state,
636                                     int index,
637                                     int is_intra,
638                                     struct intel_encoder_context *encoder_context)
639
640 {
641     struct i965_driver_data *i965 = i965_driver_data(ctx);
642     struct gen6_vme_context *vme_context = encoder_context->vme_context;
643     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
644     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
645     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
646
647     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
648     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
649
650     if (is_intra)
651         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
652     else
653         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
654
655     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
656                                               "VME output buffer",
657                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
658                                               0x1000);
659     assert(vme_context->vme_output.bo);
660     vme_context->vme_buffer_suface_setup(ctx,
661                                          &vme_context->gpe_context,
662                                          &vme_context->vme_output,
663                                          BINDING_TABLE_OFFSET(index),
664                                          SURFACE_STATE_OFFSET(index));
665 }
666
667 static void
668 gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
669                                              struct encode_state *encode_state,
670                                              int index,
671                                              struct intel_encoder_context *encoder_context)
672
673 {
674     struct i965_driver_data *i965 = i965_driver_data(ctx);
675     struct gen6_vme_context *vme_context = encoder_context->vme_context;
676     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
677     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
678     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
679
680     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
681     vme_context->vme_batchbuffer.size_block = 32; /* 4 OWORDs */
682     vme_context->vme_batchbuffer.pitch = 16;
683     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
684                                                    "VME batchbuffer",
685                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
686                                                    0x1000);
687     vme_context->vme_buffer_suface_setup(ctx,
688                                          &vme_context->gpe_context,
689                                          &vme_context->vme_batchbuffer,
690                                          BINDING_TABLE_OFFSET(index),
691                                          SURFACE_STATE_OFFSET(index));
692 }
693
694 static VAStatus
695 gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, 
696                               struct encode_state *encode_state,
697                               int is_intra,
698                               struct intel_encoder_context *encoder_context)
699 {
700     struct i965_driver_data *i965 = i965_driver_data(ctx);
701     struct object_surface *obj_surface;
702     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
703
704     /*Setup surfaces state*/
705     /* current picture for encoding */
706     obj_surface = SURFACE(encoder_context->input_yuv_surface);
707     assert(obj_surface);
708     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
709     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
710
711     if (!is_intra) {
712         /* reference 0 */
713         obj_surface = SURFACE(pic_param->forward_reference_picture);
714         assert(obj_surface);
715         if ( obj_surface->bo != NULL)
716             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
717
718         /* reference 1 */
719         obj_surface = SURFACE(pic_param->backward_reference_picture);
720         if (obj_surface && obj_surface->bo != NULL) 
721             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
722     }
723
724     /* VME output */
725     gen7_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
726     gen7_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
727
728     return VA_STATUS_SUCCESS;
729 }
730
731 static void
732 gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
733                                      struct encode_state *encode_state,
734                                      int mb_width, int mb_height,
735                                      int kernel,
736                                      int transform_8x8_mode_flag,
737                                      struct intel_encoder_context *encoder_context)
738 {
739     struct gen6_vme_context *vme_context = encoder_context->vme_context;
740     int number_mb_cmds;
741     int mb_x = 0, mb_y = 0;
742     int i, s, j;
743     unsigned int *command_ptr;
744
745     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
746     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
747
748     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
749         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
750
751         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
752             int slice_mb_begin = slice_param->macroblock_address;
753             int slice_mb_number = slice_param->num_macroblocks;
754
755             for (i = 0; i < slice_mb_number;) {
756                 int mb_count = i + slice_mb_begin;
757
758                 mb_x = mb_count % mb_width;
759                 mb_y = mb_count / mb_width;
760
761                 if( i == 0) {
762                     number_mb_cmds = mb_width;
763                 } else if ((i + 128) <= slice_mb_number) {
764                     number_mb_cmds = 128;
765                 } else {
766                     number_mb_cmds = slice_mb_number - i;
767                 }
768
769                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
770                 *command_ptr++ = kernel;
771                 *command_ptr++ = 0;
772                 *command_ptr++ = 0;
773                 *command_ptr++ = 0;
774                 *command_ptr++ = 0;
775  
776                 /*inline data */
777                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
778                 *command_ptr++ = ( (number_mb_cmds << 16) | transform_8x8_mode_flag | ((i == 0) << 1));
779
780                 i += number_mb_cmds;
781             }
782
783             slice_param++;
784         }
785     }
786
787     *command_ptr++ = 0;
788     *command_ptr++ = MI_BATCH_BUFFER_END;
789
790     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
791 }
792
793 static void
794 gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
795                                     struct encode_state *encode_state,
796                                     int is_intra,
797                                     struct intel_encoder_context *encoder_context)
798 {
799     struct gen6_vme_context *vme_context = encoder_context->vme_context;
800     struct intel_batchbuffer *batch = encoder_context->base.batch;
801     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
802     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
803     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
804
805     gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, 
806                                          encode_state,
807                                          width_in_mbs, height_in_mbs,
808                                          MPEG2_VME_INTER_SHADER,
809                                          0,
810                                          encoder_context);
811
812     intel_batchbuffer_start_atomic(batch, 0x1000);
813     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
814     BEGIN_BATCH(batch, 2);
815     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
816     OUT_RELOC(batch,
817               vme_context->vme_batchbuffer.bo,
818               I915_GEM_DOMAIN_COMMAND, 0, 
819               0);
820     ADVANCE_BATCH(batch);
821
822     intel_batchbuffer_end_atomic(batch);
823 }
824
825 static VAStatus
826 gen7_vme_mpeg2_prepare(VADriverContextP ctx, 
827                         struct encode_state *encode_state,
828                         struct intel_encoder_context *encoder_context)
829 {
830     VAStatus vaStatus = VA_STATUS_SUCCESS;
831
832    /*Setup all the memory object*/
833     gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
834     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
835     gen7_vme_vme_state_setup(ctx, encode_state, 0, encoder_context);
836     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
837
838     /*Programing media pipeline*/
839     gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
840
841     return vaStatus;
842 }
843
844 static VAStatus
845 gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
846                          VAProfile profile,
847                          struct encode_state *encode_state,
848                          struct intel_encoder_context *encoder_context)
849 {
850     struct i965_driver_data *i965 = i965_driver_data(ctx);
851     struct gen6_vme_context *vme_context = encoder_context->vme_context;
852     VAEncSliceParameterBufferMPEG2 *slice_param = 
853         (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
854     VAEncSequenceParameterBufferMPEG2 *seq_param = 
855        (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
856  
857     /*No need of to exec VME for Intra slice */
858     if (slice_param->is_intra_slice) {
859          if(!vme_context->vme_output.bo) {
860              int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
861              int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
862
863              vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
864              vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
865              vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
866              vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
867                                                        "MPEG2 VME output buffer",
868                                                        vme_context->vme_output.num_blocks
869                                                            * vme_context->vme_output.size_block,
870                                                        0x1000);
871          }
872
873          return VA_STATUS_SUCCESS;
874     }
875
876     gen7_vme_media_init(ctx, encoder_context);
877     gen7_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
878     gen7_vme_run(ctx, encode_state, encoder_context);
879     gen7_vme_stop(ctx, encode_state, encoder_context);
880
881     return VA_STATUS_SUCCESS;
882 }
883
884 static void
885 gen7_vme_context_destroy(void *context)
886 {
887     struct gen6_vme_context *vme_context = context;
888
889     i965_gpe_context_destroy(&vme_context->gpe_context);
890
891     dri_bo_unreference(vme_context->vme_output.bo);
892     vme_context->vme_output.bo = NULL;
893
894     dri_bo_unreference(vme_context->vme_state.bo);
895     vme_context->vme_state.bo = NULL;
896
897     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
898     vme_context->vme_batchbuffer.bo = NULL;
899
900     free(vme_context);
901 }
902
903 Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
904 {
905     struct i965_driver_data *i965 = i965_driver_data(ctx);
906     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
907
908     vme_context->gpe_context.surface_state_binding_table.length =
909               (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
910
911     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
912     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
913     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
914
915     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
916     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
917     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
918     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
919     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
920
921     if(encoder_context->profile == VAProfileH264Baseline ||
922        encoder_context->profile == VAProfileH264Main     ||
923        encoder_context->profile == VAProfileH264High ){
924        vme_context->video_coding_type = VIDEO_CODING_AVC;
925        vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; 
926  
927     } else if (encoder_context->profile == VAProfileMPEG2Simple ||
928                encoder_context->profile == VAProfileMPEG2Main ){
929        vme_context->video_coding_type = VIDEO_CODING_MPEG2;
930        vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM; 
931     } else {
932         /* Unsupported encoding profile */
933         assert(0);
934     }
935
936     if (IS_GEN7(i965->intel.device_id)) {
937         if (vme_context->video_coding_type == VIDEO_CODING_AVC) {
938               i965_gpe_load_kernels(ctx,
939                                     &vme_context->gpe_context,
940                                     gen7_vme_kernels,
941                                     vme_context->vme_kernel_sum);
942               encoder_context->vme_pipeline = gen7_vme_pipeline;
943  
944         } else {
945               i965_gpe_load_kernels(ctx,
946                                     &vme_context->gpe_context,
947                                     gen7_vme_mpeg2_kernels,
948                                     vme_context->vme_kernel_sum);
949               encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
950  
951         }
952
953         vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
954         vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
955         vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
956     }
957
958     encoder_context->vme_context = vme_context;
959     encoder_context->vme_context_destroy = gen7_vme_context_destroy;
960
961     return True;
962 }