VPP: Fix Coverity alert on unitialized vpp_kernels
[platform/upstream/libva-intel-driver.git] / src / gen75_vme.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include "sysdeps.h"
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_encoder.h"
38 #include "gen6_vme.h"
39 #include "gen6_mfc.h"
40
41 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
42 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
43 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
44
45 #define VME_INTRA_SHADER        0
46 #define VME_INTER_SHADER        1
47 #define VME_BINTER_SHADER       3
48 #define VME_BATCHBUFFER         2
49
50 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
52 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
53
54 #define VME_MSG_LENGTH          32
55   
56 static const uint32_t gen75_vme_intra_frame[][4] = {
57 #include "shaders/vme/intra_frame_haswell.g75b"
58 };
59
60 static const uint32_t gen75_vme_inter_frame[][4] = {
61 #include "shaders/vme/inter_frame_haswell.g75b"
62 };
63
64 static const uint32_t gen75_vme_inter_bframe[][4] = {
65 #include "shaders/vme/inter_bframe_haswell.g75b"
66 };
67
68 static const uint32_t gen75_vme_batchbuffer[][4] = {
69 #include "shaders/vme/batchbuffer.g75b"
70 };
71
72 static struct i965_kernel gen75_vme_kernels[] = {
73     {
74         "VME Intra Frame",
75         VME_INTRA_SHADER, /*index*/
76         gen75_vme_intra_frame,                  
77         sizeof(gen75_vme_intra_frame),          
78         NULL
79     },
80     {
81         "VME inter Frame",
82         VME_INTER_SHADER,
83         gen75_vme_inter_frame,
84         sizeof(gen75_vme_inter_frame),
85         NULL
86     },
87     {
88         "VME BATCHBUFFER",
89         VME_BATCHBUFFER,
90         gen75_vme_batchbuffer,
91         sizeof(gen75_vme_batchbuffer),
92         NULL
93     },
94     {
95         "VME inter BFrame",
96         VME_BINTER_SHADER,
97         gen75_vme_inter_bframe,
98         sizeof(gen75_vme_inter_bframe),
99         NULL
100     }
101 };
102
103 static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
104 #include "shaders/vme/intra_frame_haswell.g75b"
105 };
106
107 static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = {
108 #include "shaders/vme/mpeg2_inter_haswell.g75b"
109 };
110
111 static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = {
112 #include "shaders/vme/batchbuffer.g75b"
113 };
114
115 static struct i965_kernel gen75_vme_mpeg2_kernels[] = {
116     {
117         "VME Intra Frame",
118         VME_INTRA_SHADER, /*index*/
119         gen75_vme_mpeg2_intra_frame,                    
120         sizeof(gen75_vme_mpeg2_intra_frame),            
121         NULL
122     },
123     {
124         "VME inter Frame",
125         VME_INTER_SHADER,
126         gen75_vme_mpeg2_inter_frame,
127         sizeof(gen75_vme_mpeg2_inter_frame),
128         NULL
129     },
130     {
131         "VME BATCHBUFFER",
132         VME_BATCHBUFFER,
133         gen75_vme_mpeg2_batchbuffer,
134         sizeof(gen75_vme_mpeg2_batchbuffer),
135         NULL
136     },
137 };
138
139 /* only used for VME source surface state */
140 static void 
141 gen75_vme_source_surface_state(VADriverContextP ctx,
142                                int index,
143                                struct object_surface *obj_surface,
144                                struct intel_encoder_context *encoder_context)
145 {
146     struct gen6_vme_context *vme_context = encoder_context->vme_context;
147
148     vme_context->vme_surface2_setup(ctx,
149                                     &vme_context->gpe_context,
150                                     obj_surface,
151                                     BINDING_TABLE_OFFSET(index),
152                                     SURFACE_STATE_OFFSET(index));
153 }
154
155 static void
156 gen75_vme_media_source_surface_state(VADriverContextP ctx,
157                                      int index,
158                                      struct object_surface *obj_surface,
159                                      struct intel_encoder_context *encoder_context)
160 {
161     struct gen6_vme_context *vme_context = encoder_context->vme_context;
162
163     vme_context->vme_media_rw_surface_setup(ctx,
164                                             &vme_context->gpe_context,
165                                             obj_surface,
166                                             BINDING_TABLE_OFFSET(index),
167                                             SURFACE_STATE_OFFSET(index));
168 }
169
170 static void
171 gen75_vme_media_chroma_source_surface_state(VADriverContextP ctx,
172                                             int index,
173                                             struct object_surface *obj_surface,
174                                             struct intel_encoder_context *encoder_context)
175 {
176     struct gen6_vme_context *vme_context = encoder_context->vme_context;
177
178     vme_context->vme_media_chroma_surface_setup(ctx,
179                                                 &vme_context->gpe_context,
180                                                 obj_surface,
181                                                 BINDING_TABLE_OFFSET(index),
182                                                 SURFACE_STATE_OFFSET(index));
183 }
184
185 static void
186 gen75_vme_output_buffer_setup(VADriverContextP ctx,
187                               struct encode_state *encode_state,
188                               int index,
189                               struct intel_encoder_context *encoder_context)
190
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct gen6_vme_context *vme_context = encoder_context->vme_context;
194     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
195     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
196     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
197     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
198     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
199
200     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
201     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
202
203     if (is_intra)
204         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
205     else
206         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
207     /*
208      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
209      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
210      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
211      */
212
213     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
214                                               "VME output buffer",
215                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
216                                               0x1000);
217     assert(vme_context->vme_output.bo);
218     vme_context->vme_buffer_suface_setup(ctx,
219                                          &vme_context->gpe_context,
220                                          &vme_context->vme_output,
221                                          BINDING_TABLE_OFFSET(index),
222                                          SURFACE_STATE_OFFSET(index));
223 }
224
225 static void
226 gen75_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
227                                        struct encode_state *encode_state,
228                                        int index,
229                                        struct intel_encoder_context *encoder_context)
230
231 {
232     struct i965_driver_data *i965 = i965_driver_data(ctx);
233     struct gen6_vme_context *vme_context = encoder_context->vme_context;
234     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
235     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
236     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
237
238     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
239     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
240     vme_context->vme_batchbuffer.pitch = 16;
241     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
242                                                    "VME batchbuffer",
243                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
244                                                    0x1000);
245     vme_context->vme_buffer_suface_setup(ctx,
246                                          &vme_context->gpe_context,
247                                          &vme_context->vme_batchbuffer,
248                                          BINDING_TABLE_OFFSET(index),
249                                          SURFACE_STATE_OFFSET(index));
250 }
251
252 static VAStatus
253 gen75_vme_surface_setup(VADriverContextP ctx, 
254                         struct encode_state *encode_state,
255                         int is_intra,
256                         struct intel_encoder_context *encoder_context)
257 {
258     struct object_surface *obj_surface;
259
260     /*Setup surfaces state*/
261     /* current picture for encoding */
262     obj_surface = encode_state->input_yuv_object;
263     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
264     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
265     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
266
267     if (!is_intra) {
268         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
269         int slice_type;
270
271         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
272         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
273
274         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state);
275
276         if (slice_type == SLICE_TYPE_B)
277             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state);
278     }
279
280     /* VME output */
281     gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
282     gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
283
284     return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, 
288                                           struct encode_state *encode_state,
289                                           struct intel_encoder_context *encoder_context)
290 {
291     struct gen6_vme_context *vme_context = encoder_context->vme_context;
292     struct gen6_interface_descriptor_data *desc;   
293     int i;
294     dri_bo *bo;
295
296     bo = vme_context->gpe_context.idrt.bo;
297     dri_bo_map(bo, 1);
298     assert(bo->virtual);
299     desc = bo->virtual;
300
301     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
302         struct i965_kernel *kernel;
303         kernel = &vme_context->gpe_context.kernels[i];
304         assert(sizeof(*desc) == 32);
305         /*Setup the descritor table*/
306         memset(desc, 0, sizeof(*desc));
307         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
308         desc->desc2.sampler_count = 0; /* FIXME: */
309         desc->desc2.sampler_state_pointer = 0;
310         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
311         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
312         desc->desc4.constant_urb_entry_read_offset = 0;
313         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
314                 
315         /*kernel start*/
316         dri_bo_emit_reloc(bo,   
317                           I915_GEM_DOMAIN_INSTRUCTION, 0,
318                           0,
319                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
320                           kernel->bo);
321         desc++;
322     }
323     dri_bo_unmap(bo);
324
325     return VA_STATUS_SUCCESS;
326 }
327
328 static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, 
329                                          struct encode_state *encode_state,
330                                          struct intel_encoder_context *encoder_context)
331 {
332     struct gen6_vme_context *vme_context = encoder_context->vme_context;
333     unsigned char *constant_buffer;
334     unsigned int *vme_state_message;
335     int mv_num = 32;
336
337     vme_state_message = (unsigned int *)vme_context->vme_state_message;
338
339     if (encoder_context->codec == CODEC_H264 ||
340         encoder_context->codec == CODEC_H264_MVC) {
341         if (vme_context->h264_level >= 30) {
342             mv_num = 16;
343         
344             if (vme_context->h264_level >= 31)
345                 mv_num = 8;
346         } 
347     } else if (encoder_context->codec == CODEC_MPEG2) {
348         mv_num = 2;
349     }
350
351     vme_state_message[31] = mv_num;
352
353     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
354     assert(vme_context->gpe_context.curbe.bo->virtual);
355     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
356
357     /* VME MV/Mb cost table is passed by using const buffer */
358     /* Now it uses the fixed search path. So it is constructed directly
359      * in the GPU shader.
360      */
361     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
362         
363     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
364
365     return VA_STATUS_SUCCESS;
366 }
367
368 static const unsigned int intra_mb_mode_cost_table[] = {
369     0x31110001, // for qp0
370     0x09110001, // for qp1
371     0x15030001, // for qp2
372     0x0b030001, // for qp3
373     0x0d030011, // for qp4
374     0x17210011, // for qp5
375     0x41210011, // for qp6
376     0x19210011, // for qp7
377     0x25050003, // for qp8
378     0x1b130003, // for qp9
379     0x1d130003, // for qp10
380     0x27070021, // for qp11
381     0x51310021, // for qp12
382     0x29090021, // for qp13
383     0x35150005, // for qp14
384     0x2b0b0013, // for qp15
385     0x2d0d0013, // for qp16
386     0x37170007, // for qp17
387     0x61410031, // for qp18
388     0x39190009, // for qp19
389     0x45250015, // for qp20
390     0x3b1b000b, // for qp21
391     0x3d1d000d, // for qp22
392     0x47270017, // for qp23
393     0x71510041, // for qp24 ! center for qp=0..30
394     0x49290019, // for qp25
395     0x55350025, // for qp26
396     0x4b2b001b, // for qp27
397     0x4d2d001d, // for qp28
398     0x57370027, // for qp29
399     0x81610051, // for qp30
400     0x57270017, // for qp31
401     0x81510041, // for qp32 ! center for qp=31..51
402     0x59290019, // for qp33
403     0x65350025, // for qp34
404     0x5b2b001b, // for qp35
405     0x5d2d001d, // for qp36
406     0x67370027, // for qp37
407     0x91610051, // for qp38
408     0x69390029, // for qp39
409     0x75450035, // for qp40
410     0x6b3b002b, // for qp41
411     0x6d3d002d, // for qp42
412     0x77470037, // for qp43
413     0xa1710061, // for qp44
414     0x79490039, // for qp45
415     0x85550045, // for qp46
416     0x7b4b003b, // for qp47
417     0x7d4d003d, // for qp48
418     0x87570047, // for qp49
419     0xb1810071, // for qp50
420     0x89590049  // for qp51
421 };
422
423 static void gen75_vme_state_setup_fixup(VADriverContextP ctx,
424                                         struct encode_state *encode_state,
425                                         struct intel_encoder_context *encoder_context,
426                                         unsigned int *vme_state_message)
427 {
428     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
429     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
430     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
431
432     if (slice_param->slice_type != SLICE_TYPE_I &&
433         slice_param->slice_type != SLICE_TYPE_SI)
434         return;
435     if (encoder_context->rate_control_mode == VA_RC_CQP)
436         vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
437     else
438         vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
439 }
440
441 static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
442                                           struct encode_state *encode_state,
443                                           int is_intra,
444                                           struct intel_encoder_context *encoder_context)
445 {
446     struct gen6_vme_context *vme_context = encoder_context->vme_context;
447     unsigned int *vme_state_message;
448     int i;
449         
450     //pass the MV/Mb cost into VME message on HASWell
451     assert(vme_context->vme_state_message);
452     vme_state_message = (unsigned int *)vme_context->vme_state_message;
453
454     vme_state_message[0] = 0x4a4a4a4a;
455     vme_state_message[1] = 0x4a4a4a4a;
456     vme_state_message[2] = 0x4a4a4a4a;
457     vme_state_message[3] = 0x22120200;
458     vme_state_message[4] = 0x62524232;
459
460     for (i=5; i < 8; i++) {
461         vme_state_message[i] = 0;
462     }
463
464     switch (encoder_context->codec) {
465     case CODEC_H264:
466     case CODEC_H264_MVC:
467         gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
468
469         break;
470
471     default:
472         /* no fixup */
473         break;
474     }
475
476     return VA_STATUS_SUCCESS;
477 }
478
479 static void
480 gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
481                                struct encode_state *encode_state,
482                                int mb_width, int mb_height,
483                                int kernel,
484                                int transform_8x8_mode_flag,
485                                struct intel_encoder_context *encoder_context)
486 {
487     struct gen6_vme_context *vme_context = encoder_context->vme_context;
488     int mb_x = 0, mb_y = 0;
489     int i, s;
490     unsigned int *command_ptr;
491
492     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
493     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
494
495     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
496         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
497         int slice_mb_begin = pSliceParameter->macroblock_address;
498         int slice_mb_number = pSliceParameter->num_macroblocks;
499         unsigned int mb_intra_ub;
500         int slice_mb_x = pSliceParameter->macroblock_address % mb_width; 
501         for (i = 0; i < slice_mb_number;  ) {
502             int mb_count = i + slice_mb_begin;    
503             mb_x = mb_count % mb_width;
504             mb_y = mb_count / mb_width;
505             mb_intra_ub = 0;
506             if (mb_x != 0) {
507                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
508             }
509             if (mb_y != 0) {
510                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
511                 if (mb_x != 0)
512                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
513                 if (mb_x != (mb_width -1))
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
515             }
516             if (i < mb_width) {
517                 if (i == 0)
518                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
519                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
520                 if ((i == (mb_width - 1)) && slice_mb_x) {
521                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
522                 }
523             }
524                 
525             if ((i == mb_width) && slice_mb_x) {
526                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
527             }
528             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
529             *command_ptr++ = kernel;
530             *command_ptr++ = 0;
531             *command_ptr++ = 0;
532             *command_ptr++ = 0;
533             *command_ptr++ = 0;
534    
535             /*inline data */
536             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
537             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
538
539             i += 1;
540         } 
541     }
542
543     *command_ptr++ = 0;
544     *command_ptr++ = MI_BATCH_BUFFER_END;
545
546     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
547 }
548
549 static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
550 {
551     struct gen6_vme_context *vme_context = encoder_context->vme_context;
552
553     i965_gpe_context_init(ctx, &vme_context->gpe_context);
554
555     /* VME output buffer */
556     dri_bo_unreference(vme_context->vme_output.bo);
557     vme_context->vme_output.bo = NULL;
558
559     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
560     vme_context->vme_batchbuffer.bo = NULL;
561
562     /* VME state */
563     dri_bo_unreference(vme_context->vme_state.bo);
564     vme_context->vme_state.bo = NULL;
565 }
566
567 static void gen75_vme_pipeline_programing(VADriverContextP ctx, 
568                                           struct encode_state *encode_state,
569                                           struct intel_encoder_context *encoder_context)
570 {
571     struct gen6_vme_context *vme_context = encoder_context->vme_context;
572     struct intel_batchbuffer *batch = encoder_context->base.batch;
573     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
574     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
575     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
576     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
577     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
578     int kernel_shader;
579     bool allow_hwscore = true;
580     int s;
581     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
582
583     if (is_low_quality)
584         allow_hwscore = false;
585     else {
586         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
587             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
588             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
589                 allow_hwscore = false;
590                 break;
591             }
592         }
593     }
594
595     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
596         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
597         kernel_shader = VME_INTRA_SHADER;
598     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
599                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
600         kernel_shader = VME_INTER_SHADER;
601     } else {
602         kernel_shader = VME_BINTER_SHADER;
603         if (!allow_hwscore)
604             kernel_shader = VME_INTER_SHADER;
605     }
606     if (allow_hwscore)
607         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
608                                              encode_state,
609                                              width_in_mbs, height_in_mbs,
610                                              kernel_shader,
611                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
612                                              encoder_context);
613     else
614         gen75_vme_fill_vme_batchbuffer(ctx, 
615                                        encode_state,
616                                        width_in_mbs, height_in_mbs,
617                                        kernel_shader,
618                                        pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
619                                        encoder_context);
620
621     intel_batchbuffer_start_atomic(batch, 0x1000);
622     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
623     BEGIN_BATCH(batch, 2);
624     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
625     OUT_RELOC(batch,
626               vme_context->vme_batchbuffer.bo,
627               I915_GEM_DOMAIN_COMMAND, 0, 
628               0);
629     ADVANCE_BATCH(batch);
630
631     intel_batchbuffer_end_atomic(batch);        
632 }
633
634 static VAStatus gen75_vme_prepare(VADriverContextP ctx, 
635                                   struct encode_state *encode_state,
636                                   struct intel_encoder_context *encoder_context)
637 {
638     VAStatus vaStatus = VA_STATUS_SUCCESS;
639     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
640     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
641     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
642     struct gen6_vme_context *vme_context = encoder_context->vme_context;
643
644     if (!vme_context->h264_level ||
645         (vme_context->h264_level != pSequenceParameter->level_idc)) {
646         vme_context->h264_level = pSequenceParameter->level_idc;        
647     }   
648
649     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
650         
651     /*Setup all the memory object*/
652     gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
653     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
654     //gen75_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
655     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
656
657     /*Programing media pipeline*/
658     gen75_vme_pipeline_programing(ctx, encode_state, encoder_context);
659
660     return vaStatus;
661 }
662
663 static VAStatus gen75_vme_run(VADriverContextP ctx, 
664                               struct encode_state *encode_state,
665                               struct intel_encoder_context *encoder_context)
666 {
667     struct intel_batchbuffer *batch = encoder_context->base.batch;
668
669     intel_batchbuffer_flush(batch);
670
671     return VA_STATUS_SUCCESS;
672 }
673
674 static VAStatus gen75_vme_stop(VADriverContextP ctx, 
675                                struct encode_state *encode_state,
676                                struct intel_encoder_context *encoder_context)
677 {
678     return VA_STATUS_SUCCESS;
679 }
680
681 static VAStatus
682 gen75_vme_pipeline(VADriverContextP ctx,
683                    VAProfile profile,
684                    struct encode_state *encode_state,
685                    struct intel_encoder_context *encoder_context)
686 {
687     gen75_vme_media_init(ctx, encoder_context);
688     gen75_vme_prepare(ctx, encode_state, encoder_context);
689     gen75_vme_run(ctx, encode_state, encoder_context);
690     gen75_vme_stop(ctx, encode_state, encoder_context);
691
692     return VA_STATUS_SUCCESS;
693 }
694
695 static void
696 gen75_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
697                                     struct encode_state *encode_state,
698                                     int index,
699                                     int is_intra,
700                                     struct intel_encoder_context *encoder_context)
701
702 {
703     struct i965_driver_data *i965 = i965_driver_data(ctx);
704     struct gen6_vme_context *vme_context = encoder_context->vme_context;
705     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
706     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
707     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
708
709     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
710     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
711
712     if (is_intra)
713         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
714     else
715         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
716     /*
717      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
718      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
719      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
720      */
721
722     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
723                                               "VME output buffer",
724                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
725                                               0x1000);
726     assert(vme_context->vme_output.bo);
727     vme_context->vme_buffer_suface_setup(ctx,
728                                          &vme_context->gpe_context,
729                                          &vme_context->vme_output,
730                                          BINDING_TABLE_OFFSET(index),
731                                          SURFACE_STATE_OFFSET(index));
732 }
733
734 static void
735 gen75_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
736                                              struct encode_state *encode_state,
737                                              int index,
738                                              struct intel_encoder_context *encoder_context)
739
740 {
741     struct i965_driver_data *i965 = i965_driver_data(ctx);
742     struct gen6_vme_context *vme_context = encoder_context->vme_context;
743     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
744     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
745     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
746
747     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
748     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
749     vme_context->vme_batchbuffer.pitch = 16;
750     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
751                                                    "VME batchbuffer",
752                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
753                                                    0x1000);
754     vme_context->vme_buffer_suface_setup(ctx,
755                                          &vme_context->gpe_context,
756                                          &vme_context->vme_batchbuffer,
757                                          BINDING_TABLE_OFFSET(index),
758                                          SURFACE_STATE_OFFSET(index));
759 }
760
761 static VAStatus
762 gen75_vme_mpeg2_surface_setup(VADriverContextP ctx, 
763                               struct encode_state *encode_state,
764                               int is_intra,
765                               struct intel_encoder_context *encoder_context)
766 {
767     struct object_surface *obj_surface;
768
769     /*Setup surfaces state*/
770     /* current picture for encoding */
771     obj_surface = encode_state->input_yuv_object;
772     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
773     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
774     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
775
776     if (!is_intra) {
777         /* reference 0 */
778         obj_surface = encode_state->reference_objects[0];
779         if (obj_surface->bo != NULL)
780             gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
781
782         /* reference 1 */
783         obj_surface = encode_state->reference_objects[1];
784         if (obj_surface && obj_surface->bo != NULL) 
785             gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
786     }
787
788     /* VME output */
789     gen75_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
790     gen75_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
791
792     return VA_STATUS_SUCCESS;
793 }
794
795 static void
796 gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
797                                      struct encode_state *encode_state,
798                                      int mb_width, int mb_height,
799                                      int kernel,
800                                      int transform_8x8_mode_flag,
801                                      struct intel_encoder_context *encoder_context)
802 {
803     struct gen6_vme_context *vme_context = encoder_context->vme_context;
804     int mb_x = 0, mb_y = 0;
805     int i, s, j;
806     unsigned int *command_ptr;
807
808
809     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
810     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
811
812     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
813         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
814
815         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
816             int slice_mb_begin = slice_param->macroblock_address;
817             int slice_mb_number = slice_param->num_macroblocks;
818             unsigned int mb_intra_ub;
819             int slice_mb_x = slice_param->macroblock_address % mb_width;
820
821             for (i = 0; i < slice_mb_number;) {
822                 int mb_count = i + slice_mb_begin;    
823
824                 mb_x = mb_count % mb_width;
825                 mb_y = mb_count / mb_width;
826                 mb_intra_ub = 0;
827
828                 if (mb_x != 0) {
829                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
830                 }
831
832                 if (mb_y != 0) {
833                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
834
835                     if (mb_x != 0)
836                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
837
838                     if (mb_x != (mb_width -1))
839                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
840                 }
841
842                 if (i < mb_width) {
843                     if (i == 0)
844                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
845
846                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
847
848                     if ((i == (mb_width - 1)) && slice_mb_x) {
849                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
850                     }
851                 }
852                 
853                 if ((i == mb_width) && slice_mb_x) {
854                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
855                 }
856
857                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
858                 *command_ptr++ = kernel;
859                 *command_ptr++ = 0;
860                 *command_ptr++ = 0;
861                 *command_ptr++ = 0;
862                 *command_ptr++ = 0;
863    
864                 /*inline data */
865                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
866                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
867
868                 i += 1;
869             }
870
871             slice_param++;
872         }
873     }
874
875     *command_ptr++ = 0;
876     *command_ptr++ = MI_BATCH_BUFFER_END;
877
878     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
879 }
880
881 static void
882 gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
883                                     struct encode_state *encode_state,
884                                     int is_intra,
885                                     struct intel_encoder_context *encoder_context)
886 {
887     struct gen6_vme_context *vme_context = encoder_context->vme_context;
888     struct intel_batchbuffer *batch = encoder_context->base.batch;
889     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
890     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
891     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
892     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
893     bool allow_hwscore = true;
894     int s;
895     int kernel_shader;
896
897     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
898
899     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
900         int j;
901         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
902
903         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
904             if (slice_param->macroblock_address % width_in_mbs) {
905                 allow_hwscore = false;
906                 break;
907             }
908         }
909     }
910
911     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
912     if (pic_param->picture_type == VAEncPictureTypeIntra) {
913         allow_hwscore = false;
914         kernel_shader = VME_INTRA_SHADER;
915     } else {
916         kernel_shader = VME_INTER_SHADER;
917     }
918
919     if (allow_hwscore) 
920         gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
921                                                    encode_state,
922                                                    width_in_mbs, height_in_mbs,
923                                                    kernel_shader,
924                                                    encoder_context);
925     else
926         gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, 
927                                              encode_state,
928                                              width_in_mbs, height_in_mbs,
929                                              kernel_shader,
930                                              0,
931                                              encoder_context);
932
933     intel_batchbuffer_start_atomic(batch, 0x1000);
934     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
935     BEGIN_BATCH(batch, 2);
936     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
937     OUT_RELOC(batch,
938               vme_context->vme_batchbuffer.bo,
939               I915_GEM_DOMAIN_COMMAND, 0, 
940               0);
941     ADVANCE_BATCH(batch);
942
943     intel_batchbuffer_end_atomic(batch);        
944 }
945
946 static VAStatus 
947 gen75_vme_mpeg2_prepare(VADriverContextP ctx, 
948                         struct encode_state *encode_state,
949                         struct intel_encoder_context *encoder_context)
950 {
951     VAStatus vaStatus = VA_STATUS_SUCCESS;
952     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
953         
954     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
955     struct gen6_vme_context *vme_context = encoder_context->vme_context;
956
957     if ((!vme_context->mpeg2_level) ||
958         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
959         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
960     }
961
962     /*Setup all the memory object*/
963     gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
964     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
965     gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
966     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
967     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
968
969     /*Programing media pipeline*/
970     gen75_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
971
972     return vaStatus;
973 }
974
975 static VAStatus
976 gen75_vme_mpeg2_pipeline(VADriverContextP ctx,
977                          VAProfile profile,
978                          struct encode_state *encode_state,
979                          struct intel_encoder_context *encoder_context)
980 {
981     gen75_vme_media_init(ctx, encoder_context);
982     gen75_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
983     gen75_vme_run(ctx, encode_state, encoder_context);
984     gen75_vme_stop(ctx, encode_state, encoder_context);
985
986     return VA_STATUS_SUCCESS;
987 }
988
989 static void
990 gen75_vme_context_destroy(void *context)
991 {
992     struct gen6_vme_context *vme_context = context;
993
994     i965_gpe_context_destroy(&vme_context->gpe_context);
995
996     dri_bo_unreference(vme_context->vme_output.bo);
997     vme_context->vme_output.bo = NULL;
998
999     dri_bo_unreference(vme_context->vme_state.bo);
1000     vme_context->vme_state.bo = NULL;
1001
1002     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1003     vme_context->vme_batchbuffer.bo = NULL;
1004
1005     if (vme_context->vme_state_message) {
1006         free(vme_context->vme_state_message);
1007         vme_context->vme_state_message = NULL;
1008     }
1009
1010     free(vme_context);
1011 }
1012
1013 Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1014 {
1015     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1016     struct i965_kernel *vme_kernel_list = NULL;
1017     int i965_kernel_num;
1018
1019     switch (encoder_context->codec) {
1020     case CODEC_H264:
1021     case CODEC_H264_MVC:
1022         vme_kernel_list = gen75_vme_kernels;
1023         encoder_context->vme_pipeline = gen75_vme_pipeline;
1024         i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); 
1025         break;
1026
1027     case CODEC_MPEG2:
1028         vme_kernel_list = gen75_vme_mpeg2_kernels;
1029         encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
1030         i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
1031
1032         break;
1033
1034     default:
1035         /* never get here */
1036         assert(0);
1037
1038         break;
1039     }
1040     vme_context->vme_kernel_sum = i965_kernel_num;
1041     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1042
1043     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1044     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1045
1046     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1047
1048     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1049     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1050     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1051     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1052     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1053
1054     gen7_vme_scoreboard_init(ctx, vme_context);
1055
1056     i965_gpe_load_kernels(ctx,
1057                           &vme_context->gpe_context,
1058                           vme_kernel_list,
1059                           i965_kernel_num);
1060     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1061     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1062     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1063     vme_context->vme_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup;
1064
1065     encoder_context->vme_context = vme_context;
1066     encoder_context->vme_context_destroy = gen75_vme_context_destroy;
1067
1068     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1069
1070     return True;
1071 }