Add multi quality levels encoding support for GEN7
[platform/upstream/libva-intel-driver.git] / src / gen7_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #ifdef SURFACE_STATE_PADDED_SIZE
44 #undef SURFACE_STATE_PADDED_SIZE
45 #endif
46
47 #define VME_MSG_LENGTH          32
48
49 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN7
50 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
51 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52
53 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
54 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
55 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
56
57 enum VIDEO_CODING_TYPE{
58     VIDEO_CODING_AVC = 0,
59     VIDEO_CODING_MPEG2,
60     VIDEO_CODING_SUM
61 };
62
63 enum AVC_VME_KERNEL_TYPE{ 
64     AVC_VME_INTRA_SHADER = 0,
65     AVC_VME_INTER_SHADER,
66     AVC_VME_BATCHBUFFER,
67     AVC_VME_BINTER_SHADER,
68     AVC_VME_KERNEL_SUM
69 };
70
71 enum MPEG2_VME_KERNEL_TYPE{
72     MPEG2_VME_INTER_SHADER = 0,
73     MPEG2_VME_BATCHBUFFER,
74     MPEG2_VME_KERNEL_SUM
75 };
76  
77
78 static const uint32_t gen7_vme_intra_frame[][4] = {
79 #include "shaders/vme/intra_frame_ivb.g7b"
80 };
81
82 static const uint32_t gen7_vme_inter_frame[][4] = {
83 #include "shaders/vme/inter_frame_ivb.g7b"
84 };
85
86 static const uint32_t gen7_vme_batchbuffer[][4] = {
87 #include "shaders/vme/batchbuffer.g7b"
88 };
89
90 static const uint32_t gen7_vme_binter_frame[][4] = {
91 #include "shaders/vme/inter_bframe_ivb.g7b"
92 };
93
94 static struct i965_kernel gen7_vme_kernels[] = {
95     {
96         "AVC VME Intra Frame",
97         AVC_VME_INTRA_SHADER,                   /*index*/
98         gen7_vme_intra_frame,                   
99         sizeof(gen7_vme_intra_frame),           
100         NULL
101     },
102     {
103         "AVC VME inter Frame",
104         AVC_VME_INTER_SHADER,
105         gen7_vme_inter_frame,
106         sizeof(gen7_vme_inter_frame),
107         NULL
108     },
109     {
110         "AVC VME BATCHBUFFER",
111         AVC_VME_BATCHBUFFER,
112         gen7_vme_batchbuffer,
113         sizeof(gen7_vme_batchbuffer),
114         NULL
115     },
116     {
117         "AVC VME binter Frame",
118         AVC_VME_BINTER_SHADER,
119         gen7_vme_binter_frame,
120         sizeof(gen7_vme_binter_frame),
121         NULL
122     }
123 };
124
125 static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
126 #include "shaders/vme/mpeg2_inter_ivb.g7b"
127 };
128
129 static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
130 #include "shaders/vme/batchbuffer.g7b"
131 };
132
133 static struct i965_kernel gen7_vme_mpeg2_kernels[] = {
134     {
135         "MPEG2 VME inter Frame",
136         MPEG2_VME_INTER_SHADER,
137         gen7_vme_mpeg2_inter_frame,
138         sizeof(gen7_vme_mpeg2_inter_frame),
139         NULL
140     },
141     {
142         "MPEG2 VME BATCHBUFFER",
143         MPEG2_VME_BATCHBUFFER,
144         gen7_vme_mpeg2_batchbuffer,
145         sizeof(gen7_vme_mpeg2_batchbuffer),
146         NULL
147     },
148 };
149
150 /* only used for VME source surface state */
151 static void 
152 gen7_vme_source_surface_state(VADriverContextP ctx,
153                               int index,
154                               struct object_surface *obj_surface,
155                               struct intel_encoder_context *encoder_context)
156 {
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     vme_context->vme_surface2_setup(ctx,
160                                     &vme_context->gpe_context,
161                                     obj_surface,
162                                     BINDING_TABLE_OFFSET(index),
163                                     SURFACE_STATE_OFFSET(index));
164 }
165
166 static void
167 gen7_vme_media_source_surface_state(VADriverContextP ctx,
168                                     int index,
169                                     struct object_surface *obj_surface,
170                                     struct intel_encoder_context *encoder_context)
171 {
172     struct gen6_vme_context *vme_context = encoder_context->vme_context;
173
174     vme_context->vme_media_rw_surface_setup(ctx,
175                                             &vme_context->gpe_context,
176                                             obj_surface,
177                                             BINDING_TABLE_OFFSET(index),
178                                             SURFACE_STATE_OFFSET(index));
179 }
180
181 static void
182 gen7_vme_output_buffer_setup(VADriverContextP ctx,
183                              struct encode_state *encode_state,
184                              int index,
185                              struct intel_encoder_context *encoder_context)
186
187 {
188     struct i965_driver_data *i965 = i965_driver_data(ctx);
189     struct gen6_vme_context *vme_context = encoder_context->vme_context;
190     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
191     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
192     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
193     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
194     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
195
196     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
197     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
198
199     if (is_intra)
200         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
201     else
202         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
203
204     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
205                                               "VME output buffer",
206                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
207                                               0x1000);
208     assert(vme_context->vme_output.bo);
209     vme_context->vme_buffer_suface_setup(ctx,
210                                          &vme_context->gpe_context,
211                                          &vme_context->vme_output,
212                                          BINDING_TABLE_OFFSET(index),
213                                          SURFACE_STATE_OFFSET(index));
214 }
215
216 static void
217 gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
218                                       struct encode_state *encode_state,
219                                       int index,
220                                       struct intel_encoder_context *encoder_context)
221
222 {
223     struct i965_driver_data *i965 = i965_driver_data(ctx);
224     struct gen6_vme_context *vme_context = encoder_context->vme_context;
225     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
226     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
227     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
228
229     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
230     vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
231     vme_context->vme_batchbuffer.pitch = 16;
232     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
233                                                    "VME batchbuffer",
234                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
235                                                    0x1000);
236     vme_context->vme_buffer_suface_setup(ctx,
237                                          &vme_context->gpe_context,
238                                          &vme_context->vme_batchbuffer,
239                                          BINDING_TABLE_OFFSET(index),
240                                          SURFACE_STATE_OFFSET(index));
241 }
242
243 static VAStatus
244 gen7_vme_surface_setup(VADriverContextP ctx, 
245                        struct encode_state *encode_state,
246                        int is_intra,
247                        struct intel_encoder_context *encoder_context)
248 {
249     struct object_surface *obj_surface;
250
251     /*Setup surfaces state*/
252     /* current picture for encoding */
253     obj_surface = encode_state->input_yuv_object;
254     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
255     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
256
257     if (!is_intra) {
258         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
259         int slice_type;
260
261         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
262         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
263
264         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen7_vme_source_surface_state);
265
266         if (slice_type == SLICE_TYPE_B)
267             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen7_vme_source_surface_state);
268     }
269
270     /* VME output */
271     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
272     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
273
274     return VA_STATUS_SUCCESS;
275 }
276
277 static VAStatus gen7_vme_interface_setup(VADriverContextP ctx, 
278                                          struct encode_state *encode_state,
279                                          struct intel_encoder_context *encoder_context)
280 {
281     struct gen6_vme_context *vme_context = encoder_context->vme_context;
282     struct gen6_interface_descriptor_data *desc;   
283     int i;
284     dri_bo *bo;
285
286     bo = vme_context->gpe_context.idrt.bo;
287     dri_bo_map(bo, 1);
288     assert(bo->virtual);
289     desc = bo->virtual;
290
291     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
292         struct i965_kernel *kernel;
293         kernel = &vme_context->gpe_context.kernels[i];
294         assert(sizeof(*desc) == 32);
295         /*Setup the descritor table*/
296         memset(desc, 0, sizeof(*desc));
297         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
298         desc->desc2.sampler_count = 1; /* FIXME: */
299         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
300         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
301         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
302         desc->desc4.constant_urb_entry_read_offset = 0;
303         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
304                 
305         /*kernel start*/
306         dri_bo_emit_reloc(bo,   
307                           I915_GEM_DOMAIN_INSTRUCTION, 0,
308                           0,
309                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
310                           kernel->bo);
311         /*Sampler State(VME state pointer)*/
312         dri_bo_emit_reloc(bo,
313                           I915_GEM_DOMAIN_INSTRUCTION, 0,
314                           (1 << 2),                                                                     //
315                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
316                           vme_context->vme_state.bo);
317         desc++;
318     }
319     dri_bo_unmap(bo);
320
321     return VA_STATUS_SUCCESS;
322 }
323
324 static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, 
325                                         struct encode_state *encode_state,
326                                         struct intel_encoder_context *encoder_context)
327 {
328     struct gen6_vme_context *vme_context = encoder_context->vme_context;
329     unsigned char *constant_buffer;
330     unsigned int *vme_state_message;
331     int mv_num;
332
333     vme_state_message = (unsigned int *)vme_context->vme_state_message;
334     mv_num = 32;
335
336     if (encoder_context->codec == CODEC_H264) {
337         if (vme_context->h264_level >= 30) {
338             mv_num = 16;
339         
340             if (vme_context->h264_level >= 31)
341                 mv_num = 8;
342         }
343     } else if (encoder_context->codec == CODEC_MPEG2) { 
344         mv_num = 2;
345     }
346
347
348     vme_state_message[31] = mv_num;
349
350     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
351     assert(vme_context->gpe_context.curbe.bo->virtual);
352     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
353
354     /* Pass the required constant info into the constant buffer */
355     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
356         
357     dri_bo_unmap( vme_context->gpe_context.curbe.bo);
358
359     return VA_STATUS_SUCCESS;
360 }
361
362
363 static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
364                                          struct encode_state *encode_state,
365                                          int is_intra,
366                                          struct intel_encoder_context *encoder_context)
367 {
368     struct gen6_vme_context *vme_context = encoder_context->vme_context;
369     unsigned int *vme_state_message;
370     unsigned int *mb_cost_table;
371     int i;
372     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
373     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
374
375     mb_cost_table = (unsigned int *)vme_context->vme_state_message;
376     //building VME state message
377     dri_bo_map(vme_context->vme_state.bo, 1);
378     assert(vme_context->vme_state.bo->virtual);
379     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
380
381     if (((slice_param->slice_type == SLICE_TYPE_P) ||
382         (slice_param->slice_type == SLICE_TYPE_SP) &&
383         !is_low_quality)) {
384         vme_state_message[0] = 0x01010101;
385         vme_state_message[1] = 0x10010101;
386         vme_state_message[2] = 0x0F0F0F0F;
387         vme_state_message[3] = 0x100F0F0F;
388         vme_state_message[4] = 0x01010101;
389         vme_state_message[5] = 0x10010101;
390         vme_state_message[6] = 0x0F0F0F0F;
391         vme_state_message[7] = 0x100F0F0F;
392         vme_state_message[8] = 0x01010101;
393         vme_state_message[9] = 0x10010101;
394         vme_state_message[10] = 0x0F0F0F0F;
395         vme_state_message[11] = 0x000F0F0F;
396         vme_state_message[12] = 0x00;
397         vme_state_message[13] = 0x00;
398     } else {
399         vme_state_message[0] = 0x10010101;
400         vme_state_message[1] = 0x100F0F0F;
401         vme_state_message[2] = 0x10010101;
402         vme_state_message[3] = 0x000F0F0F;
403         vme_state_message[4] = 0;
404         vme_state_message[5] = 0;
405         vme_state_message[6] = 0;
406         vme_state_message[7] = 0;
407         vme_state_message[8] = 0;
408         vme_state_message[9] = 0;
409         vme_state_message[10] = 0;
410         vme_state_message[11] = 0;
411         vme_state_message[12] = 0;
412         vme_state_message[13] = 0;
413     }
414
415     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
416     vme_state_message[15] = 0;
417     vme_state_message[16] = mb_cost_table[0];
418     vme_state_message[17] = mb_cost_table[1];
419     vme_state_message[18] = mb_cost_table[3];
420     vme_state_message[19] = mb_cost_table[4];
421
422     for(i = 20; i < 32; i++) {
423         vme_state_message[i] = 0;
424     }
425
426     dri_bo_unmap( vme_context->vme_state.bo);
427     return VA_STATUS_SUCCESS;
428 }
429
430 static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx,
431                                            struct encode_state *encode_state,
432                                            int is_intra,
433                                            struct intel_encoder_context *encoder_context)
434 {
435     struct gen6_vme_context *vme_context = encoder_context->vme_context;
436     unsigned int *vme_state_message;
437     int i;
438     unsigned int *mb_cost_table;
439
440     mb_cost_table = (unsigned int *)vme_context->vme_state_message;
441         
442     //building VME state message
443     dri_bo_map(vme_context->vme_state.bo, 1);
444     assert(vme_context->vme_state.bo->virtual);
445     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
446
447     vme_state_message[0] = 0x01010101;
448     vme_state_message[1] = 0x10010101;
449     vme_state_message[2] = 0x0F0F0F0F;
450     vme_state_message[3] = 0x100F0F0F;
451     vme_state_message[4] = 0x01010101;
452     vme_state_message[5] = 0x10010101;
453     vme_state_message[6] = 0x0F0F0F0F;
454     vme_state_message[7] = 0x100F0F0F;
455     vme_state_message[8] = 0x01010101;
456     vme_state_message[9] = 0x10010101;
457     vme_state_message[10] = 0x0F0F0F0F;
458     vme_state_message[11] = 0x000F0F0F;
459     vme_state_message[12] = 0x00;
460     vme_state_message[13] = 0x00;
461
462     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
463     vme_state_message[15] = 0;
464     vme_state_message[16] = mb_cost_table[0];
465     vme_state_message[17] = 0;
466     vme_state_message[18] = mb_cost_table[3];
467     vme_state_message[19] = mb_cost_table[4];
468
469     for(i = 20; i < 32; i++) {
470         vme_state_message[i] = 0;
471     }
472     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
473
474     dri_bo_unmap( vme_context->vme_state.bo);
475     return VA_STATUS_SUCCESS;
476 }
477
478 static void
479 gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
480                               struct encode_state *encode_state,
481                               int mb_width, int mb_height,
482                               int kernel,
483                               int transform_8x8_mode_flag,
484                               struct intel_encoder_context *encoder_context)
485 {
486     struct gen6_vme_context *vme_context = encoder_context->vme_context;
487     int mb_x = 0, mb_y = 0;
488     int i, s, j;
489     unsigned int *command_ptr;
490
491
492     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
493     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
494
495     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
496         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
497
498         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
499             int slice_mb_begin = slice_param->macroblock_address;
500             int slice_mb_number = slice_param->num_macroblocks;
501             unsigned int mb_intra_ub;
502             int slice_mb_x = slice_param->macroblock_address % mb_width;
503
504             for (i = 0; i < slice_mb_number;) {
505                 int mb_count = i + slice_mb_begin;    
506
507                 mb_x = mb_count % mb_width;
508                 mb_y = mb_count / mb_width;
509                 mb_intra_ub = 0;
510
511                 if (mb_x != 0) {
512                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
513                 }
514
515                 if (mb_y != 0) {
516                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
517
518                     if (mb_x != 0)
519                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
520
521                     if (mb_x != (mb_width -1))
522                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
523                 }
524
525                 if (i < mb_width) {
526                     if (i == 0)
527                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
528
529                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
530
531                     if ((i == (mb_width - 1)) && slice_mb_x) {
532                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
533                     }
534                 }
535                 
536                 if ((i == mb_width) && slice_mb_x) {
537                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
538                 }
539
540                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
541                 *command_ptr++ = kernel;
542                 *command_ptr++ = 0;
543                 *command_ptr++ = 0;
544                 *command_ptr++ = 0;
545                 *command_ptr++ = 0;
546    
547                 /*inline data */
548                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
549                 *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
550
551                 i += 1;
552             }
553
554             slice_param++;
555         }
556     }
557
558     *command_ptr++ = 0;
559     *command_ptr++ = MI_BATCH_BUFFER_END;
560
561     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
562 }
563
564
565 static void gen7_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
566 {
567     struct i965_driver_data *i965 = i965_driver_data(ctx);
568     struct gen6_vme_context *vme_context = encoder_context->vme_context;
569     dri_bo *bo;
570
571     i965_gpe_context_init(ctx, &vme_context->gpe_context);
572
573     /* VME output buffer */
574     dri_bo_unreference(vme_context->vme_output.bo);
575     vme_context->vme_output.bo = NULL;
576
577     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
578     vme_context->vme_batchbuffer.bo = NULL;
579
580     /* VME state */
581     dri_bo_unreference(vme_context->vme_state.bo);
582     bo = dri_bo_alloc(i965->intel.bufmgr,
583                       "Buffer",
584                       1024*16, 64);
585     assert(bo);
586     vme_context->vme_state.bo = bo;
587 }
588
589 static void gen7_vme_pipeline_programing(VADriverContextP ctx, 
590                                          struct encode_state *encode_state,
591                                          struct intel_encoder_context *encoder_context)
592 {
593     struct gen6_vme_context *vme_context = encoder_context->vme_context;
594     struct intel_batchbuffer *batch = encoder_context->base.batch;
595     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
596     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
597     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
598     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
599     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
600     int s;
601     bool allow_hwscore = true;
602     int kernel_shader;
603     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
604
605     if (is_low_quality)
606         allow_hwscore = false;
607     else {
608         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
609             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
610             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
611                 allow_hwscore = false;
612                 break;
613             }
614         }
615     }
616
617     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
618         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
619         kernel_shader = AVC_VME_INTRA_SHADER;
620     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
621                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
622         kernel_shader = AVC_VME_INTER_SHADER;
623     } else {
624         kernel_shader = AVC_VME_BINTER_SHADER;
625         if (!allow_hwscore)
626             kernel_shader = AVC_VME_INTER_SHADER;
627     }
628
629     if (allow_hwscore)
630         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
631                                              encode_state,
632                                              width_in_mbs, height_in_mbs,
633                                              kernel_shader,
634                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
635                                              encoder_context);
636         
637     else
638         gen7_vme_fill_vme_batchbuffer(ctx, 
639                                       encode_state,
640                                       width_in_mbs, height_in_mbs,
641                                       kernel_shader,
642                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
643                                       encoder_context);
644
645     intel_batchbuffer_start_atomic(batch, 0x1000);
646     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
647     BEGIN_BATCH(batch, 2);
648     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
649     OUT_RELOC(batch,
650               vme_context->vme_batchbuffer.bo,
651               I915_GEM_DOMAIN_COMMAND, 0, 
652               0);
653     ADVANCE_BATCH(batch);
654
655     intel_batchbuffer_end_atomic(batch);        
656 }
657
658 static VAStatus gen7_vme_prepare(VADriverContextP ctx, 
659                                  struct encode_state *encode_state,
660                                  struct intel_encoder_context *encoder_context)
661 {
662     VAStatus vaStatus = VA_STATUS_SUCCESS;
663     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
664     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
665     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
666     struct gen6_vme_context *vme_context = encoder_context->vme_context;
667
668     if (!vme_context->h264_level ||
669         (vme_context->h264_level != pSequenceParameter->level_idc)) {
670         vme_context->h264_level = pSequenceParameter->level_idc;        
671     }
672         
673     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
674     /*Setup all the memory object*/
675     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
676     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
677     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
678     gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
679
680     /*Programing media pipeline*/
681     gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
682
683     return vaStatus;
684 }
685
686 static VAStatus gen7_vme_run(VADriverContextP ctx, 
687                              struct encode_state *encode_state,
688                              struct intel_encoder_context *encoder_context)
689 {
690     struct intel_batchbuffer *batch = encoder_context->base.batch;
691
692     intel_batchbuffer_flush(batch);
693
694     return VA_STATUS_SUCCESS;
695 }
696
697 static VAStatus gen7_vme_stop(VADriverContextP ctx, 
698                               struct encode_state *encode_state,
699                               struct intel_encoder_context *encoder_context)
700 {
701     return VA_STATUS_SUCCESS;
702 }
703
704 static VAStatus
705 gen7_vme_pipeline(VADriverContextP ctx,
706                   VAProfile profile,
707                   struct encode_state *encode_state,
708                   struct intel_encoder_context *encoder_context)
709 {
710     gen7_vme_media_init(ctx, encoder_context);
711     gen7_vme_prepare(ctx, encode_state, encoder_context);
712     gen7_vme_run(ctx, encode_state, encoder_context);
713     gen7_vme_stop(ctx, encode_state, encoder_context);
714
715     return VA_STATUS_SUCCESS;
716 }
717
718 static void
719 gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
720                                    struct encode_state *encode_state,
721                                    int index,
722                                    int is_intra,
723                                    struct intel_encoder_context *encoder_context)
724
725 {
726     struct i965_driver_data *i965 = i965_driver_data(ctx);
727     struct gen6_vme_context *vme_context = encoder_context->vme_context;
728     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
729     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
730     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
731
732     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
733     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
734
735     if (is_intra)
736         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
737     else
738         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
739
740     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
741                                               "VME output buffer",
742                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
743                                               0x1000);
744     assert(vme_context->vme_output.bo);
745     vme_context->vme_buffer_suface_setup(ctx,
746                                          &vme_context->gpe_context,
747                                          &vme_context->vme_output,
748                                          BINDING_TABLE_OFFSET(index),
749                                          SURFACE_STATE_OFFSET(index));
750 }
751
752 static void
753 gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
754                                             struct encode_state *encode_state,
755                                             int index,
756                                             struct intel_encoder_context *encoder_context)
757
758 {
759     struct i965_driver_data *i965 = i965_driver_data(ctx);
760     struct gen6_vme_context *vme_context = encoder_context->vme_context;
761     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
762     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
763     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
764
765     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
766     vme_context->vme_batchbuffer.size_block = 32; /* 4 OWORDs */
767     vme_context->vme_batchbuffer.pitch = 16;
768     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
769                                                    "VME batchbuffer",
770                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
771                                                    0x1000);
772     vme_context->vme_buffer_suface_setup(ctx,
773                                          &vme_context->gpe_context,
774                                          &vme_context->vme_batchbuffer,
775                                          BINDING_TABLE_OFFSET(index),
776                                          SURFACE_STATE_OFFSET(index));
777 }
778
779 static VAStatus
780 gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, 
781                              struct encode_state *encode_state,
782                              int is_intra,
783                              struct intel_encoder_context *encoder_context)
784 {
785     struct object_surface *obj_surface;
786
787     /*Setup surfaces state*/
788     /* current picture for encoding */
789     obj_surface = encode_state->input_yuv_object;
790     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
791     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
792
793     if (!is_intra) {
794         /* reference 0 */
795         obj_surface = encode_state->reference_objects[0];
796         if (obj_surface->bo != NULL)
797             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
798
799         /* reference 1 */
800         obj_surface = encode_state->reference_objects[1];
801         if (obj_surface && obj_surface->bo != NULL) 
802             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
803     }
804
805     /* VME output */
806     gen7_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
807     gen7_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
808
809     return VA_STATUS_SUCCESS;
810 }
811
812 static void
813 gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
814                                     struct encode_state *encode_state,
815                                     int mb_width, int mb_height,
816                                     int kernel,
817                                     int transform_8x8_mode_flag,
818                                     struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_vme_context *vme_context = encoder_context->vme_context;
821     int mb_x = 0, mb_y = 0;
822     int i, s, j;
823     unsigned int *command_ptr;
824
825     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
826     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
827
828     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
829         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
830
831         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
832             int slice_mb_begin = slice_param->macroblock_address;
833             int slice_mb_number = slice_param->num_macroblocks;
834             unsigned int mb_intra_ub;
835
836             for (i = 0; i < slice_mb_number;) {
837                 int mb_count = i + slice_mb_begin;    
838
839                 mb_x = mb_count % mb_width;
840                 mb_y = mb_count / mb_width;
841                 mb_intra_ub = 0;
842
843                 if (mb_x != 0) {
844                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
845                 }
846
847                 if (mb_y != 0) {
848                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
849
850                     if (mb_x != 0)
851                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
852
853                     if (mb_x != (mb_width -1))
854                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
855                 }
856
857                 
858
859                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
860                 *command_ptr++ = kernel;
861                 *command_ptr++ = 0;
862                 *command_ptr++ = 0;
863                 *command_ptr++ = 0;
864                 *command_ptr++ = 0;
865    
866                 /*inline data */
867                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
868                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
869
870                 i += 1;
871             }
872
873             slice_param++;
874         }
875     }
876
877     *command_ptr++ = 0;
878     *command_ptr++ = MI_BATCH_BUFFER_END;
879
880     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
881 }
882
883 static void
884 gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
885                                    struct encode_state *encode_state,
886                                    int is_intra,
887                                    struct intel_encoder_context *encoder_context)
888 {
889     struct gen6_vme_context *vme_context = encoder_context->vme_context;
890     struct intel_batchbuffer *batch = encoder_context->base.batch;
891     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
892     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
893     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
894
895     bool allow_hwscore = true;
896     int s;
897
898     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
899         int j;
900         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
901
902         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
903             if (slice_param->macroblock_address % width_in_mbs) {
904                 allow_hwscore = false;
905                 break;
906             }
907         }
908     }
909
910     if (allow_hwscore) 
911         gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
912                                                    encode_state,
913                                                    width_in_mbs, height_in_mbs,
914                                                    MPEG2_VME_INTER_SHADER,
915                                                    encoder_context);
916     else
917         gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, 
918                                             encode_state,
919                                             width_in_mbs, height_in_mbs,
920                                             MPEG2_VME_INTER_SHADER,
921                                             0,
922                                             encoder_context);
923
924     intel_batchbuffer_start_atomic(batch, 0x1000);
925     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
926     BEGIN_BATCH(batch, 2);
927     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
928     OUT_RELOC(batch,
929               vme_context->vme_batchbuffer.bo,
930               I915_GEM_DOMAIN_COMMAND, 0, 
931               0);
932     ADVANCE_BATCH(batch);
933
934     intel_batchbuffer_end_atomic(batch);
935 }
936
937 static VAStatus
938 gen7_vme_mpeg2_prepare(VADriverContextP ctx, 
939                        struct encode_state *encode_state,
940                        struct intel_encoder_context *encoder_context)
941 {
942     VAStatus vaStatus = VA_STATUS_SUCCESS;
943     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
944     struct gen6_vme_context *vme_context = encoder_context->vme_context;
945
946     if ((!vme_context->mpeg2_level) ||
947         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
948         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
949     }
950
951     /*Setup all the memory object*/
952
953     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
954     gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
955     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
956     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
957     gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context);
958
959     /*Programing media pipeline*/
960     gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
961
962     return vaStatus;
963 }
964
965 static VAStatus
966 gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
967                         VAProfile profile,
968                         struct encode_state *encode_state,
969                         struct intel_encoder_context *encoder_context)
970 {
971     struct i965_driver_data *i965 = i965_driver_data(ctx);
972     struct gen6_vme_context *vme_context = encoder_context->vme_context;
973     VAEncSliceParameterBufferMPEG2 *slice_param = 
974         (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
975     VAEncSequenceParameterBufferMPEG2 *seq_param = 
976         (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
977  
978     /*No need of to exec VME for Intra slice */
979     if (slice_param->is_intra_slice) {
980         if(!vme_context->vme_output.bo) {
981             int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
982             int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
983
984             vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
985             vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
986             vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
987             vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
988                                                       "MPEG2 VME output buffer",
989                                                       vme_context->vme_output.num_blocks
990                                                       * vme_context->vme_output.size_block,
991                                                       0x1000);
992         }
993
994         return VA_STATUS_SUCCESS;
995     }
996
997     gen7_vme_media_init(ctx, encoder_context);
998     gen7_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
999     gen7_vme_run(ctx, encode_state, encoder_context);
1000     gen7_vme_stop(ctx, encode_state, encoder_context);
1001
1002     return VA_STATUS_SUCCESS;
1003 }
1004
1005 static void
1006 gen7_vme_context_destroy(void *context)
1007 {
1008     struct gen6_vme_context *vme_context = context;
1009
1010     i965_gpe_context_destroy(&vme_context->gpe_context);
1011
1012     dri_bo_unreference(vme_context->vme_output.bo);
1013     vme_context->vme_output.bo = NULL;
1014
1015     dri_bo_unreference(vme_context->vme_state.bo);
1016     vme_context->vme_state.bo = NULL;
1017
1018     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1019     vme_context->vme_batchbuffer.bo = NULL;
1020
1021     if (vme_context->vme_state_message) {
1022         free(vme_context->vme_state_message);
1023         vme_context->vme_state_message = NULL;
1024     }
1025
1026     free(vme_context);
1027 }
1028
1029 Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1030 {
1031     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1032     struct i965_kernel *vme_kernel_list = NULL;
1033
1034     vme_context->gpe_context.surface_state_binding_table.length =
1035         (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1036
1037     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1038     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1039     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1040
1041     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1042     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
1043     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1044     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1045     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1046
1047     gen7_vme_scoreboard_init(ctx, vme_context);
1048
1049     if (encoder_context->codec == CODEC_H264) {
1050         vme_kernel_list = gen7_vme_kernels;
1051         vme_context->video_coding_type = VIDEO_CODING_AVC;
1052         vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; 
1053         encoder_context->vme_pipeline = gen7_vme_pipeline; 
1054     } else if (encoder_context->codec == CODEC_MPEG2) {
1055         vme_kernel_list = gen7_vme_mpeg2_kernels;
1056         vme_context->video_coding_type = VIDEO_CODING_MPEG2;
1057         vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM;
1058         encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
1059     } else {
1060         /* Unsupported codec */
1061         assert(0);
1062     }
1063
1064     i965_gpe_load_kernels(ctx,
1065                           &vme_context->gpe_context,
1066                           vme_kernel_list,
1067                           vme_context->vme_kernel_sum);
1068
1069     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1070     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1071     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1072
1073     encoder_context->vme_context = vme_context;
1074     encoder_context->vme_context_destroy = gen7_vme_context_destroy;
1075     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1076
1077     return True;
1078 }