VEBOX/bdw: set downsample method
[platform/upstream/libva-intel-driver.git] / src / gen7_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #ifdef SURFACE_STATE_PADDED_SIZE
44 #undef SURFACE_STATE_PADDED_SIZE
45 #endif
46
47 #define VME_MSG_LENGTH          32
48
49 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN7
50 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
51 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52
53 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
54 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
55 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
56
57 enum VIDEO_CODING_TYPE{
58     VIDEO_CODING_AVC = 0,
59     VIDEO_CODING_MPEG2,
60     VIDEO_CODING_SUM
61 };
62
63 enum AVC_VME_KERNEL_TYPE{ 
64     AVC_VME_INTRA_SHADER = 0,
65     AVC_VME_INTER_SHADER,
66     AVC_VME_BATCHBUFFER,
67     AVC_VME_BINTER_SHADER,
68     AVC_VME_KERNEL_SUM
69 };
70
71 enum MPEG2_VME_KERNEL_TYPE{
72     MPEG2_VME_INTER_SHADER = 0,
73     MPEG2_VME_BATCHBUFFER,
74     MPEG2_VME_KERNEL_SUM
75 };
76  
77
78 static const uint32_t gen7_vme_intra_frame[][4] = {
79 #include "shaders/vme/intra_frame_ivb.g7b"
80 };
81
82 static const uint32_t gen7_vme_inter_frame[][4] = {
83 #include "shaders/vme/inter_frame_ivb.g7b"
84 };
85
86 static const uint32_t gen7_vme_batchbuffer[][4] = {
87 #include "shaders/vme/batchbuffer.g7b"
88 };
89
90 static const uint32_t gen7_vme_binter_frame[][4] = {
91 #include "shaders/vme/inter_bframe_ivb.g7b"
92 };
93
94 static struct i965_kernel gen7_vme_kernels[] = {
95     {
96         "AVC VME Intra Frame",
97         AVC_VME_INTRA_SHADER,                   /*index*/
98         gen7_vme_intra_frame,                   
99         sizeof(gen7_vme_intra_frame),           
100         NULL
101     },
102     {
103         "AVC VME inter Frame",
104         AVC_VME_INTER_SHADER,
105         gen7_vme_inter_frame,
106         sizeof(gen7_vme_inter_frame),
107         NULL
108     },
109     {
110         "AVC VME BATCHBUFFER",
111         AVC_VME_BATCHBUFFER,
112         gen7_vme_batchbuffer,
113         sizeof(gen7_vme_batchbuffer),
114         NULL
115     },
116     {
117         "AVC VME binter Frame",
118         AVC_VME_BINTER_SHADER,
119         gen7_vme_binter_frame,
120         sizeof(gen7_vme_binter_frame),
121         NULL
122     }
123 };
124
125 static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
126 #include "shaders/vme/mpeg2_inter_ivb.g7b"
127 };
128
129 static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
130 #include "shaders/vme/batchbuffer.g7b"
131 };
132
133 static struct i965_kernel gen7_vme_mpeg2_kernels[] = {
134     {
135         "MPEG2 VME inter Frame",
136         MPEG2_VME_INTER_SHADER,
137         gen7_vme_mpeg2_inter_frame,
138         sizeof(gen7_vme_mpeg2_inter_frame),
139         NULL
140     },
141     {
142         "MPEG2 VME BATCHBUFFER",
143         MPEG2_VME_BATCHBUFFER,
144         gen7_vme_mpeg2_batchbuffer,
145         sizeof(gen7_vme_mpeg2_batchbuffer),
146         NULL
147     },
148 };
149
150 /* only used for VME source surface state */
151 static void 
152 gen7_vme_source_surface_state(VADriverContextP ctx,
153                               int index,
154                               struct object_surface *obj_surface,
155                               struct intel_encoder_context *encoder_context)
156 {
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     vme_context->vme_surface2_setup(ctx,
160                                     &vme_context->gpe_context,
161                                     obj_surface,
162                                     BINDING_TABLE_OFFSET(index),
163                                     SURFACE_STATE_OFFSET(index));
164 }
165
166 static void
167 gen7_vme_media_source_surface_state(VADriverContextP ctx,
168                                     int index,
169                                     struct object_surface *obj_surface,
170                                     struct intel_encoder_context *encoder_context)
171 {
172     struct gen6_vme_context *vme_context = encoder_context->vme_context;
173
174     vme_context->vme_media_rw_surface_setup(ctx,
175                                             &vme_context->gpe_context,
176                                             obj_surface,
177                                             BINDING_TABLE_OFFSET(index),
178                                             SURFACE_STATE_OFFSET(index));
179 }
180
181 static void
182 gen7_vme_output_buffer_setup(VADriverContextP ctx,
183                              struct encode_state *encode_state,
184                              int index,
185                              struct intel_encoder_context *encoder_context)
186
187 {
188     struct i965_driver_data *i965 = i965_driver_data(ctx);
189     struct gen6_vme_context *vme_context = encoder_context->vme_context;
190     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
191     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
192     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
193     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
194     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
195
196     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
197     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
198
199     if (is_intra)
200         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
201     else
202         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
203
204     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
205                                               "VME output buffer",
206                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
207                                               0x1000);
208     assert(vme_context->vme_output.bo);
209     vme_context->vme_buffer_suface_setup(ctx,
210                                          &vme_context->gpe_context,
211                                          &vme_context->vme_output,
212                                          BINDING_TABLE_OFFSET(index),
213                                          SURFACE_STATE_OFFSET(index));
214 }
215
216 static void
217 gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
218                                       struct encode_state *encode_state,
219                                       int index,
220                                       struct intel_encoder_context *encoder_context)
221
222 {
223     struct i965_driver_data *i965 = i965_driver_data(ctx);
224     struct gen6_vme_context *vme_context = encoder_context->vme_context;
225     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
226     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
227     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
228
229     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
230     vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
231     vme_context->vme_batchbuffer.pitch = 16;
232     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
233                                                    "VME batchbuffer",
234                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
235                                                    0x1000);
236     vme_context->vme_buffer_suface_setup(ctx,
237                                          &vme_context->gpe_context,
238                                          &vme_context->vme_batchbuffer,
239                                          BINDING_TABLE_OFFSET(index),
240                                          SURFACE_STATE_OFFSET(index));
241 }
242
243 static VAStatus
244 gen7_vme_surface_setup(VADriverContextP ctx, 
245                        struct encode_state *encode_state,
246                        int is_intra,
247                        struct intel_encoder_context *encoder_context)
248 {
249     struct object_surface *obj_surface;
250
251     /*Setup surfaces state*/
252     /* current picture for encoding */
253     obj_surface = encode_state->input_yuv_object;
254     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
255     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
256
257     if (!is_intra) {
258         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
259         int slice_type;
260
261         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
262         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
263
264         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen7_vme_source_surface_state);
265
266         if (slice_type == SLICE_TYPE_B)
267             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen7_vme_source_surface_state);
268     }
269
270     /* VME output */
271     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
272     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
273
274     return VA_STATUS_SUCCESS;
275 }
276
277 static VAStatus gen7_vme_interface_setup(VADriverContextP ctx, 
278                                          struct encode_state *encode_state,
279                                          struct intel_encoder_context *encoder_context)
280 {
281     struct gen6_vme_context *vme_context = encoder_context->vme_context;
282     struct gen6_interface_descriptor_data *desc;   
283     int i;
284     dri_bo *bo;
285
286     bo = vme_context->gpe_context.idrt.bo;
287     dri_bo_map(bo, 1);
288     assert(bo->virtual);
289     desc = bo->virtual;
290
291     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
292         struct i965_kernel *kernel;
293         kernel = &vme_context->gpe_context.kernels[i];
294         assert(sizeof(*desc) == 32);
295         /*Setup the descritor table*/
296         memset(desc, 0, sizeof(*desc));
297         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
298         desc->desc2.sampler_count = 1; /* FIXME: */
299         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
300         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
301         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
302         desc->desc4.constant_urb_entry_read_offset = 0;
303         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
304                 
305         /*kernel start*/
306         dri_bo_emit_reloc(bo,   
307                           I915_GEM_DOMAIN_INSTRUCTION, 0,
308                           0,
309                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
310                           kernel->bo);
311         /*Sampler State(VME state pointer)*/
312         dri_bo_emit_reloc(bo,
313                           I915_GEM_DOMAIN_INSTRUCTION, 0,
314                           (1 << 2),                                                                     //
315                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
316                           vme_context->vme_state.bo);
317         desc++;
318     }
319     dri_bo_unmap(bo);
320
321     return VA_STATUS_SUCCESS;
322 }
323
324 static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, 
325                                         struct encode_state *encode_state,
326                                         struct intel_encoder_context *encoder_context)
327 {
328     struct gen6_vme_context *vme_context = encoder_context->vme_context;
329     unsigned char *constant_buffer;
330     unsigned int *vme_state_message;
331     int mv_num;
332
333     vme_state_message = (unsigned int *)vme_context->vme_state_message;
334     mv_num = 32;
335
336     if (encoder_context->codec == CODEC_H264) {
337         if (vme_context->h264_level >= 30) {
338             mv_num = 16;
339         
340             if (vme_context->h264_level >= 31)
341                 mv_num = 8;
342         }
343     } else if (encoder_context->codec == CODEC_MPEG2) { 
344         mv_num = 2;
345     }
346
347
348     vme_state_message[31] = mv_num;
349
350     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
351     assert(vme_context->gpe_context.curbe.bo->virtual);
352     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
353
354     /* Pass the required constant info into the constant buffer */
355     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
356         
357     dri_bo_unmap( vme_context->gpe_context.curbe.bo);
358
359     return VA_STATUS_SUCCESS;
360 }
361
362
363 static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
364                                          struct encode_state *encode_state,
365                                          int is_intra,
366                                          struct intel_encoder_context *encoder_context)
367 {
368     struct gen6_vme_context *vme_context = encoder_context->vme_context;
369     unsigned int *vme_state_message;
370     unsigned int *mb_cost_table;
371     int i;
372     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
373
374     mb_cost_table = (unsigned int *)vme_context->vme_state_message;
375     //building VME state message
376     dri_bo_map(vme_context->vme_state.bo, 1);
377     assert(vme_context->vme_state.bo->virtual);
378     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
379
380     if ((slice_param->slice_type == SLICE_TYPE_P) ||
381         (slice_param->slice_type == SLICE_TYPE_SP)) {
382         vme_state_message[0] = 0x01010101;
383         vme_state_message[1] = 0x10010101;
384         vme_state_message[2] = 0x0F0F0F0F;
385         vme_state_message[3] = 0x100F0F0F;
386         vme_state_message[4] = 0x01010101;
387         vme_state_message[5] = 0x10010101;
388         vme_state_message[6] = 0x0F0F0F0F;
389         vme_state_message[7] = 0x100F0F0F;
390         vme_state_message[8] = 0x01010101;
391         vme_state_message[9] = 0x10010101;
392         vme_state_message[10] = 0x0F0F0F0F;
393         vme_state_message[11] = 0x000F0F0F;
394         vme_state_message[12] = 0x00;
395         vme_state_message[13] = 0x00;
396     } else {
397         vme_state_message[0] = 0x10010101;
398         vme_state_message[1] = 0x100F0F0F;
399         vme_state_message[2] = 0x10010101;
400         vme_state_message[3] = 0x000F0F0F;
401         vme_state_message[4] = 0;
402         vme_state_message[5] = 0;
403         vme_state_message[6] = 0;
404         vme_state_message[7] = 0;
405         vme_state_message[8] = 0;
406         vme_state_message[9] = 0;
407         vme_state_message[10] = 0;
408         vme_state_message[11] = 0;
409         vme_state_message[12] = 0;
410         vme_state_message[13] = 0;
411     }
412
413     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
414     vme_state_message[15] = 0;
415     vme_state_message[16] = mb_cost_table[0];
416     vme_state_message[17] = mb_cost_table[1];
417     vme_state_message[18] = mb_cost_table[3];
418     vme_state_message[19] = mb_cost_table[4];
419
420     for(i = 20; i < 32; i++) {
421         vme_state_message[i] = 0;
422     }
423
424     dri_bo_unmap( vme_context->vme_state.bo);
425     return VA_STATUS_SUCCESS;
426 }
427
428 static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx,
429                                            struct encode_state *encode_state,
430                                            int is_intra,
431                                            struct intel_encoder_context *encoder_context)
432 {
433     struct gen6_vme_context *vme_context = encoder_context->vme_context;
434     unsigned int *vme_state_message;
435     int i;
436     unsigned int *mb_cost_table;
437
438     mb_cost_table = (unsigned int *)vme_context->vme_state_message;
439         
440     //building VME state message
441     dri_bo_map(vme_context->vme_state.bo, 1);
442     assert(vme_context->vme_state.bo->virtual);
443     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
444
445     vme_state_message[0] = 0x01010101;
446     vme_state_message[1] = 0x10010101;
447     vme_state_message[2] = 0x0F0F0F0F;
448     vme_state_message[3] = 0x100F0F0F;
449     vme_state_message[4] = 0x01010101;
450     vme_state_message[5] = 0x10010101;
451     vme_state_message[6] = 0x0F0F0F0F;
452     vme_state_message[7] = 0x100F0F0F;
453     vme_state_message[8] = 0x01010101;
454     vme_state_message[9] = 0x10010101;
455     vme_state_message[10] = 0x0F0F0F0F;
456     vme_state_message[11] = 0x000F0F0F;
457     vme_state_message[12] = 0x00;
458     vme_state_message[13] = 0x00;
459
460     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
461     vme_state_message[15] = 0;
462     vme_state_message[16] = mb_cost_table[0];
463     vme_state_message[17] = 0;
464     vme_state_message[18] = mb_cost_table[3];
465     vme_state_message[19] = mb_cost_table[4];
466
467     for(i = 20; i < 32; i++) {
468         vme_state_message[i] = 0;
469     }
470     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
471
472     dri_bo_unmap( vme_context->vme_state.bo);
473     return VA_STATUS_SUCCESS;
474 }
475
476 static void
477 gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
478                               struct encode_state *encode_state,
479                               int mb_width, int mb_height,
480                               int kernel,
481                               int transform_8x8_mode_flag,
482                               struct intel_encoder_context *encoder_context)
483 {
484     struct gen6_vme_context *vme_context = encoder_context->vme_context;
485     int mb_x = 0, mb_y = 0;
486     int i, s, j;
487     unsigned int *command_ptr;
488
489
490     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
491     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
492
493     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
494         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
495
496         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
497             int slice_mb_begin = slice_param->macroblock_address;
498             int slice_mb_number = slice_param->num_macroblocks;
499             unsigned int mb_intra_ub;
500             int slice_mb_x = slice_param->macroblock_address % mb_width;
501
502             for (i = 0; i < slice_mb_number;) {
503                 int mb_count = i + slice_mb_begin;    
504
505                 mb_x = mb_count % mb_width;
506                 mb_y = mb_count / mb_width;
507                 mb_intra_ub = 0;
508
509                 if (mb_x != 0) {
510                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
511                 }
512
513                 if (mb_y != 0) {
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
515
516                     if (mb_x != 0)
517                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
518
519                     if (mb_x != (mb_width -1))
520                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
521                 }
522
523                 if (i < mb_width) {
524                     if (i == 0)
525                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
526
527                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
528
529                     if ((i == (mb_width - 1)) && slice_mb_x) {
530                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
531                     }
532                 }
533                 
534                 if ((i == mb_width) && slice_mb_x) {
535                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
536                 }
537
538                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
539                 *command_ptr++ = kernel;
540                 *command_ptr++ = 0;
541                 *command_ptr++ = 0;
542                 *command_ptr++ = 0;
543                 *command_ptr++ = 0;
544    
545                 /*inline data */
546                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
547                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
548
549                 i += 1;
550             }
551
552             slice_param++;
553         }
554     }
555
556     *command_ptr++ = 0;
557     *command_ptr++ = MI_BATCH_BUFFER_END;
558
559     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
560 }
561
562
563 static void gen7_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
564 {
565     struct i965_driver_data *i965 = i965_driver_data(ctx);
566     struct gen6_vme_context *vme_context = encoder_context->vme_context;
567     dri_bo *bo;
568
569     i965_gpe_context_init(ctx, &vme_context->gpe_context);
570
571     /* VME output buffer */
572     dri_bo_unreference(vme_context->vme_output.bo);
573     vme_context->vme_output.bo = NULL;
574
575     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
576     vme_context->vme_batchbuffer.bo = NULL;
577
578     /* VME state */
579     dri_bo_unreference(vme_context->vme_state.bo);
580     bo = dri_bo_alloc(i965->intel.bufmgr,
581                       "Buffer",
582                       1024*16, 64);
583     assert(bo);
584     vme_context->vme_state.bo = bo;
585 }
586
587 static void gen7_vme_pipeline_programing(VADriverContextP ctx, 
588                                          struct encode_state *encode_state,
589                                          struct intel_encoder_context *encoder_context)
590 {
591     struct gen6_vme_context *vme_context = encoder_context->vme_context;
592     struct intel_batchbuffer *batch = encoder_context->base.batch;
593     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
594     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
595     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
596     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
597     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
598     int s;
599     bool allow_hwscore = true;
600     int kernel_shader;
601
602     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
603         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
604         if ((pSliceParameter->macroblock_address % width_in_mbs)) {
605             allow_hwscore = false;
606             break;
607         }
608     }
609
610     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
611         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
612         kernel_shader = AVC_VME_INTRA_SHADER;
613     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
614                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
615         kernel_shader = AVC_VME_INTER_SHADER;
616     } else {
617         kernel_shader = AVC_VME_BINTER_SHADER;
618         if (!allow_hwscore)
619             kernel_shader = AVC_VME_INTER_SHADER;
620     }
621
622     if (allow_hwscore)
623         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
624                                              encode_state,
625                                              width_in_mbs, height_in_mbs,
626                                              kernel_shader,
627                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
628                                              encoder_context);
629         
630     else
631         gen7_vme_fill_vme_batchbuffer(ctx, 
632                                       encode_state,
633                                       width_in_mbs, height_in_mbs,
634                                       kernel_shader,
635                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
636                                       encoder_context);
637
638     intel_batchbuffer_start_atomic(batch, 0x1000);
639     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
640     BEGIN_BATCH(batch, 2);
641     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
642     OUT_RELOC(batch,
643               vme_context->vme_batchbuffer.bo,
644               I915_GEM_DOMAIN_COMMAND, 0, 
645               0);
646     ADVANCE_BATCH(batch);
647
648     intel_batchbuffer_end_atomic(batch);        
649 }
650
651 static VAStatus gen7_vme_prepare(VADriverContextP ctx, 
652                                  struct encode_state *encode_state,
653                                  struct intel_encoder_context *encoder_context)
654 {
655     VAStatus vaStatus = VA_STATUS_SUCCESS;
656     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
657     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
658     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
659     struct gen6_vme_context *vme_context = encoder_context->vme_context;
660
661     if (!vme_context->h264_level ||
662         (vme_context->h264_level != pSequenceParameter->level_idc)) {
663         vme_context->h264_level = pSequenceParameter->level_idc;        
664     }
665         
666     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
667     /*Setup all the memory object*/
668     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
669     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
670     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
671     gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
672
673     /*Programing media pipeline*/
674     gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
675
676     return vaStatus;
677 }
678
679 static VAStatus gen7_vme_run(VADriverContextP ctx, 
680                              struct encode_state *encode_state,
681                              struct intel_encoder_context *encoder_context)
682 {
683     struct intel_batchbuffer *batch = encoder_context->base.batch;
684
685     intel_batchbuffer_flush(batch);
686
687     return VA_STATUS_SUCCESS;
688 }
689
690 static VAStatus gen7_vme_stop(VADriverContextP ctx, 
691                               struct encode_state *encode_state,
692                               struct intel_encoder_context *encoder_context)
693 {
694     return VA_STATUS_SUCCESS;
695 }
696
697 static VAStatus
698 gen7_vme_pipeline(VADriverContextP ctx,
699                   VAProfile profile,
700                   struct encode_state *encode_state,
701                   struct intel_encoder_context *encoder_context)
702 {
703     gen7_vme_media_init(ctx, encoder_context);
704     gen7_vme_prepare(ctx, encode_state, encoder_context);
705     gen7_vme_run(ctx, encode_state, encoder_context);
706     gen7_vme_stop(ctx, encode_state, encoder_context);
707
708     return VA_STATUS_SUCCESS;
709 }
710
711 static void
712 gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
713                                    struct encode_state *encode_state,
714                                    int index,
715                                    int is_intra,
716                                    struct intel_encoder_context *encoder_context)
717
718 {
719     struct i965_driver_data *i965 = i965_driver_data(ctx);
720     struct gen6_vme_context *vme_context = encoder_context->vme_context;
721     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
722     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
723     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
724
725     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
726     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
727
728     if (is_intra)
729         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
730     else
731         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
732
733     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
734                                               "VME output buffer",
735                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
736                                               0x1000);
737     assert(vme_context->vme_output.bo);
738     vme_context->vme_buffer_suface_setup(ctx,
739                                          &vme_context->gpe_context,
740                                          &vme_context->vme_output,
741                                          BINDING_TABLE_OFFSET(index),
742                                          SURFACE_STATE_OFFSET(index));
743 }
744
745 static void
746 gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
747                                             struct encode_state *encode_state,
748                                             int index,
749                                             struct intel_encoder_context *encoder_context)
750
751 {
752     struct i965_driver_data *i965 = i965_driver_data(ctx);
753     struct gen6_vme_context *vme_context = encoder_context->vme_context;
754     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
755     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
756     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
757
758     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
759     vme_context->vme_batchbuffer.size_block = 32; /* 4 OWORDs */
760     vme_context->vme_batchbuffer.pitch = 16;
761     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
762                                                    "VME batchbuffer",
763                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
764                                                    0x1000);
765     vme_context->vme_buffer_suface_setup(ctx,
766                                          &vme_context->gpe_context,
767                                          &vme_context->vme_batchbuffer,
768                                          BINDING_TABLE_OFFSET(index),
769                                          SURFACE_STATE_OFFSET(index));
770 }
771
772 static VAStatus
773 gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, 
774                              struct encode_state *encode_state,
775                              int is_intra,
776                              struct intel_encoder_context *encoder_context)
777 {
778     struct object_surface *obj_surface;
779
780     /*Setup surfaces state*/
781     /* current picture for encoding */
782     obj_surface = encode_state->input_yuv_object;
783     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
784     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
785
786     if (!is_intra) {
787         /* reference 0 */
788         obj_surface = encode_state->reference_objects[0];
789         if (obj_surface->bo != NULL)
790             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
791
792         /* reference 1 */
793         obj_surface = encode_state->reference_objects[1];
794         if (obj_surface && obj_surface->bo != NULL) 
795             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
796     }
797
798     /* VME output */
799     gen7_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
800     gen7_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
801
802     return VA_STATUS_SUCCESS;
803 }
804
805 static void
806 gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
807                                     struct encode_state *encode_state,
808                                     int mb_width, int mb_height,
809                                     int kernel,
810                                     int transform_8x8_mode_flag,
811                                     struct intel_encoder_context *encoder_context)
812 {
813     struct gen6_vme_context *vme_context = encoder_context->vme_context;
814     int mb_x = 0, mb_y = 0;
815     int i, s, j;
816     unsigned int *command_ptr;
817
818     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
819     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
820
821     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
822         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
823
824         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
825             int slice_mb_begin = slice_param->macroblock_address;
826             int slice_mb_number = slice_param->num_macroblocks;
827             unsigned int mb_intra_ub;
828
829             for (i = 0; i < slice_mb_number;) {
830                 int mb_count = i + slice_mb_begin;    
831
832                 mb_x = mb_count % mb_width;
833                 mb_y = mb_count / mb_width;
834                 mb_intra_ub = 0;
835
836                 if (mb_x != 0) {
837                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
838                 }
839
840                 if (mb_y != 0) {
841                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
842
843                     if (mb_x != 0)
844                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
845
846                     if (mb_x != (mb_width -1))
847                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
848                 }
849
850                 
851
852                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
853                 *command_ptr++ = kernel;
854                 *command_ptr++ = 0;
855                 *command_ptr++ = 0;
856                 *command_ptr++ = 0;
857                 *command_ptr++ = 0;
858    
859                 /*inline data */
860                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
861                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
862
863                 i += 1;
864             }
865
866             slice_param++;
867         }
868     }
869
870     *command_ptr++ = 0;
871     *command_ptr++ = MI_BATCH_BUFFER_END;
872
873     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
874 }
875
876 static void
877 gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
878                                    struct encode_state *encode_state,
879                                    int is_intra,
880                                    struct intel_encoder_context *encoder_context)
881 {
882     struct gen6_vme_context *vme_context = encoder_context->vme_context;
883     struct intel_batchbuffer *batch = encoder_context->base.batch;
884     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
885     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
886     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
887
888     bool allow_hwscore = true;
889     int s;
890
891     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
892         int j;
893         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
894
895         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
896             if (slice_param->macroblock_address % width_in_mbs) {
897                 allow_hwscore = false;
898                 break;
899             }
900         }
901     }
902
903     if (allow_hwscore) 
904         gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
905                                                    encode_state,
906                                                    width_in_mbs, height_in_mbs,
907                                                    MPEG2_VME_INTER_SHADER,
908                                                    encoder_context);
909     else
910         gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, 
911                                             encode_state,
912                                             width_in_mbs, height_in_mbs,
913                                             MPEG2_VME_INTER_SHADER,
914                                             0,
915                                             encoder_context);
916
917     intel_batchbuffer_start_atomic(batch, 0x1000);
918     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
919     BEGIN_BATCH(batch, 2);
920     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
921     OUT_RELOC(batch,
922               vme_context->vme_batchbuffer.bo,
923               I915_GEM_DOMAIN_COMMAND, 0, 
924               0);
925     ADVANCE_BATCH(batch);
926
927     intel_batchbuffer_end_atomic(batch);
928 }
929
930 static VAStatus
931 gen7_vme_mpeg2_prepare(VADriverContextP ctx, 
932                        struct encode_state *encode_state,
933                        struct intel_encoder_context *encoder_context)
934 {
935     VAStatus vaStatus = VA_STATUS_SUCCESS;
936     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
937     struct gen6_vme_context *vme_context = encoder_context->vme_context;
938
939     if ((!vme_context->mpeg2_level) ||
940         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
941         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
942     }
943
944     /*Setup all the memory object*/
945
946     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
947     gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
948     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
949     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
950     gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context);
951
952     /*Programing media pipeline*/
953     gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
954
955     return vaStatus;
956 }
957
958 static VAStatus
959 gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
960                         VAProfile profile,
961                         struct encode_state *encode_state,
962                         struct intel_encoder_context *encoder_context)
963 {
964     struct i965_driver_data *i965 = i965_driver_data(ctx);
965     struct gen6_vme_context *vme_context = encoder_context->vme_context;
966     VAEncSliceParameterBufferMPEG2 *slice_param = 
967         (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
968     VAEncSequenceParameterBufferMPEG2 *seq_param = 
969         (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
970  
971     /*No need of to exec VME for Intra slice */
972     if (slice_param->is_intra_slice) {
973         if(!vme_context->vme_output.bo) {
974             int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
975             int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
976
977             vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
978             vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
979             vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
980             vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
981                                                       "MPEG2 VME output buffer",
982                                                       vme_context->vme_output.num_blocks
983                                                       * vme_context->vme_output.size_block,
984                                                       0x1000);
985         }
986
987         return VA_STATUS_SUCCESS;
988     }
989
990     gen7_vme_media_init(ctx, encoder_context);
991     gen7_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
992     gen7_vme_run(ctx, encode_state, encoder_context);
993     gen7_vme_stop(ctx, encode_state, encoder_context);
994
995     return VA_STATUS_SUCCESS;
996 }
997
998 static void
999 gen7_vme_context_destroy(void *context)
1000 {
1001     struct gen6_vme_context *vme_context = context;
1002
1003     i965_gpe_context_destroy(&vme_context->gpe_context);
1004
1005     dri_bo_unreference(vme_context->vme_output.bo);
1006     vme_context->vme_output.bo = NULL;
1007
1008     dri_bo_unreference(vme_context->vme_state.bo);
1009     vme_context->vme_state.bo = NULL;
1010
1011     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1012     vme_context->vme_batchbuffer.bo = NULL;
1013
1014     if (vme_context->vme_state_message) {
1015         free(vme_context->vme_state_message);
1016         vme_context->vme_state_message = NULL;
1017     }
1018
1019     free(vme_context);
1020 }
1021
1022 Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1023 {
1024     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1025     struct i965_kernel *vme_kernel_list = NULL;
1026
1027     vme_context->gpe_context.surface_state_binding_table.length =
1028         (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1029
1030     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1031     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1032     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1033
1034     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1035     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
1036     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1037     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1038     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1039
1040     gen7_vme_scoreboard_init(ctx, vme_context);
1041
1042     if (encoder_context->codec == CODEC_H264) {
1043         vme_kernel_list = gen7_vme_kernels;
1044         vme_context->video_coding_type = VIDEO_CODING_AVC;
1045         vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; 
1046         encoder_context->vme_pipeline = gen7_vme_pipeline; 
1047     } else if (encoder_context->codec == CODEC_MPEG2) {
1048         vme_kernel_list = gen7_vme_mpeg2_kernels;
1049         vme_context->video_coding_type = VIDEO_CODING_MPEG2;
1050         vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM;
1051         encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
1052     } else {
1053         /* Unsupported codec */
1054         assert(0);
1055     }
1056
1057     i965_gpe_load_kernels(ctx,
1058                           &vme_context->gpe_context,
1059                           vme_kernel_list,
1060                           vme_context->vme_kernel_sum);
1061
1062     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1063     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1064     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1065
1066     encoder_context->vme_context = vme_context;
1067     encoder_context->vme_context_destroy = gen7_vme_context_destroy;
1068     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1069
1070     return True;
1071 }