Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / gen75_vme.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include "sysdeps.h"
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_encoder.h"
38 #include "gen6_vme.h"
39 #include "gen6_mfc.h"
40
41 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
42 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
43 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
44
45 #define VME_INTRA_SHADER        0
46 #define VME_INTER_SHADER        1
47 #define VME_BINTER_SHADER       3
48 #define VME_BATCHBUFFER         2
49
50 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
52 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
53
54 #define VME_MSG_LENGTH          32
55   
56 static const uint32_t gen75_vme_intra_frame[][4] = {
57 #include "shaders/vme/intra_frame_haswell.g75b"
58 };
59
60 static const uint32_t gen75_vme_inter_frame[][4] = {
61 #include "shaders/vme/inter_frame_haswell.g75b"
62 };
63
64 static const uint32_t gen75_vme_inter_bframe[][4] = {
65 #include "shaders/vme/inter_bframe_haswell.g75b"
66 };
67
68 static const uint32_t gen75_vme_batchbuffer[][4] = {
69 #include "shaders/vme/batchbuffer.g75b"
70 };
71
72 static struct i965_kernel gen75_vme_kernels[] = {
73     {
74         "VME Intra Frame",
75         VME_INTRA_SHADER, /*index*/
76         gen75_vme_intra_frame,                  
77         sizeof(gen75_vme_intra_frame),          
78         NULL
79     },
80     {
81         "VME inter Frame",
82         VME_INTER_SHADER,
83         gen75_vme_inter_frame,
84         sizeof(gen75_vme_inter_frame),
85         NULL
86     },
87     {
88         "VME BATCHBUFFER",
89         VME_BATCHBUFFER,
90         gen75_vme_batchbuffer,
91         sizeof(gen75_vme_batchbuffer),
92         NULL
93     },
94     {
95         "VME inter BFrame",
96         VME_BINTER_SHADER,
97         gen75_vme_inter_bframe,
98         sizeof(gen75_vme_inter_bframe),
99         NULL
100     }
101 };
102
103 static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
104 #include "shaders/vme/intra_frame_haswell.g75b"
105 };
106
107 static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = {
108 #include "shaders/vme/mpeg2_inter_haswell.g75b"
109 };
110
111 static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = {
112 #include "shaders/vme/batchbuffer.g75b"
113 };
114
115 static struct i965_kernel gen75_vme_mpeg2_kernels[] = {
116     {
117         "VME Intra Frame",
118         VME_INTRA_SHADER, /*index*/
119         gen75_vme_mpeg2_intra_frame,                    
120         sizeof(gen75_vme_mpeg2_intra_frame),            
121         NULL
122     },
123     {
124         "VME inter Frame",
125         VME_INTER_SHADER,
126         gen75_vme_mpeg2_inter_frame,
127         sizeof(gen75_vme_mpeg2_inter_frame),
128         NULL
129     },
130     {
131         "VME BATCHBUFFER",
132         VME_BATCHBUFFER,
133         gen75_vme_mpeg2_batchbuffer,
134         sizeof(gen75_vme_mpeg2_batchbuffer),
135         NULL
136     },
137 };
138
139 /* only used for VME source surface state */
140 static void 
141 gen75_vme_source_surface_state(VADriverContextP ctx,
142                                int index,
143                                struct object_surface *obj_surface,
144                                struct intel_encoder_context *encoder_context)
145 {
146     struct gen6_vme_context *vme_context = encoder_context->vme_context;
147
148     vme_context->vme_surface2_setup(ctx,
149                                     &vme_context->gpe_context,
150                                     obj_surface,
151                                     BINDING_TABLE_OFFSET(index),
152                                     SURFACE_STATE_OFFSET(index));
153 }
154
155 static void
156 gen75_vme_media_source_surface_state(VADriverContextP ctx,
157                                      int index,
158                                      struct object_surface *obj_surface,
159                                      struct intel_encoder_context *encoder_context)
160 {
161     struct gen6_vme_context *vme_context = encoder_context->vme_context;
162
163     vme_context->vme_media_rw_surface_setup(ctx,
164                                             &vme_context->gpe_context,
165                                             obj_surface,
166                                             BINDING_TABLE_OFFSET(index),
167                                             SURFACE_STATE_OFFSET(index));
168 }
169
170 static void
171 gen75_vme_media_chroma_source_surface_state(VADriverContextP ctx,
172                                             int index,
173                                             struct object_surface *obj_surface,
174                                             struct intel_encoder_context *encoder_context)
175 {
176     struct gen6_vme_context *vme_context = encoder_context->vme_context;
177
178     vme_context->vme_media_chroma_surface_setup(ctx,
179                                                 &vme_context->gpe_context,
180                                                 obj_surface,
181                                                 BINDING_TABLE_OFFSET(index),
182                                                 SURFACE_STATE_OFFSET(index));
183 }
184
185 static void
186 gen75_vme_output_buffer_setup(VADriverContextP ctx,
187                               struct encode_state *encode_state,
188                               int index,
189                               struct intel_encoder_context *encoder_context)
190
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct gen6_vme_context *vme_context = encoder_context->vme_context;
194     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
195     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
196     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
197     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
198     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
199
200     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
201     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
202
203     if (is_intra)
204         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
205     else
206         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
207     /*
208      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
209      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
210      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
211      */
212
213     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
214                                               "VME output buffer",
215                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
216                                               0x1000);
217     assert(vme_context->vme_output.bo);
218     vme_context->vme_buffer_suface_setup(ctx,
219                                          &vme_context->gpe_context,
220                                          &vme_context->vme_output,
221                                          BINDING_TABLE_OFFSET(index),
222                                          SURFACE_STATE_OFFSET(index));
223 }
224
225 static void
226 gen75_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
227                                        struct encode_state *encode_state,
228                                        int index,
229                                        struct intel_encoder_context *encoder_context)
230
231 {
232     struct i965_driver_data *i965 = i965_driver_data(ctx);
233     struct gen6_vme_context *vme_context = encoder_context->vme_context;
234     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
235     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
236     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
237
238     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
239     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
240     vme_context->vme_batchbuffer.pitch = 16;
241     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
242                                                    "VME batchbuffer",
243                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
244                                                    0x1000);
245     vme_context->vme_buffer_suface_setup(ctx,
246                                          &vme_context->gpe_context,
247                                          &vme_context->vme_batchbuffer,
248                                          BINDING_TABLE_OFFSET(index),
249                                          SURFACE_STATE_OFFSET(index));
250 }
251
252 static VAStatus
253 gen75_vme_surface_setup(VADriverContextP ctx, 
254                         struct encode_state *encode_state,
255                         int is_intra,
256                         struct intel_encoder_context *encoder_context)
257 {
258     struct object_surface *obj_surface;
259
260     /*Setup surfaces state*/
261     /* current picture for encoding */
262     obj_surface = encode_state->input_yuv_object;
263     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
264     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
265     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
266
267     if (!is_intra) {
268         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
269         int slice_type;
270
271         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
272         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
273
274         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state);
275
276         if (slice_type == SLICE_TYPE_B)
277             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state);
278     }
279
280     /* VME output */
281     gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
282     gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
283
284     return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, 
288                                           struct encode_state *encode_state,
289                                           struct intel_encoder_context *encoder_context)
290 {
291     struct gen6_vme_context *vme_context = encoder_context->vme_context;
292     struct gen6_interface_descriptor_data *desc;   
293     int i;
294     dri_bo *bo;
295
296     bo = vme_context->gpe_context.idrt.bo;
297     dri_bo_map(bo, 1);
298     assert(bo->virtual);
299     desc = bo->virtual;
300
301     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
302         struct i965_kernel *kernel;
303         kernel = &vme_context->gpe_context.kernels[i];
304         assert(sizeof(*desc) == 32);
305         /*Setup the descritor table*/
306         memset(desc, 0, sizeof(*desc));
307         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
308         desc->desc2.sampler_count = 0; /* FIXME: */
309         desc->desc2.sampler_state_pointer = 0;
310         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
311         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
312         desc->desc4.constant_urb_entry_read_offset = 0;
313         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
314                 
315         /*kernel start*/
316         dri_bo_emit_reloc(bo,   
317                           I915_GEM_DOMAIN_INSTRUCTION, 0,
318                           0,
319                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
320                           kernel->bo);
321         desc++;
322     }
323     dri_bo_unmap(bo);
324
325     return VA_STATUS_SUCCESS;
326 }
327
328 static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, 
329                                          struct encode_state *encode_state,
330                                          struct intel_encoder_context *encoder_context)
331 {
332     struct gen6_vme_context *vme_context = encoder_context->vme_context;
333     unsigned char *constant_buffer;
334     unsigned int *vme_state_message;
335     int mv_num = 32;
336
337     vme_state_message = (unsigned int *)vme_context->vme_state_message;
338
339     if (encoder_context->codec == CODEC_H264) {
340         if (vme_context->h264_level >= 30) {
341             mv_num = 16;
342         
343             if (vme_context->h264_level >= 31)
344                 mv_num = 8;
345         } 
346     } else if (encoder_context->codec == CODEC_MPEG2) {
347         mv_num = 2;
348     }
349
350     vme_state_message[31] = mv_num;
351
352     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
353     assert(vme_context->gpe_context.curbe.bo->virtual);
354     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
355
356     /* VME MV/Mb cost table is passed by using const buffer */
357     /* Now it uses the fixed search path. So it is constructed directly
358      * in the GPU shader.
359      */
360     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
361         
362     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
363
364     return VA_STATUS_SUCCESS;
365 }
366
367 static const unsigned int intra_mb_mode_cost_table[] = {
368     0x31110001, // for qp0
369     0x09110001, // for qp1
370     0x15030001, // for qp2
371     0x0b030001, // for qp3
372     0x0d030011, // for qp4
373     0x17210011, // for qp5
374     0x41210011, // for qp6
375     0x19210011, // for qp7
376     0x25050003, // for qp8
377     0x1b130003, // for qp9
378     0x1d130003, // for qp10
379     0x27070021, // for qp11
380     0x51310021, // for qp12
381     0x29090021, // for qp13
382     0x35150005, // for qp14
383     0x2b0b0013, // for qp15
384     0x2d0d0013, // for qp16
385     0x37170007, // for qp17
386     0x61410031, // for qp18
387     0x39190009, // for qp19
388     0x45250015, // for qp20
389     0x3b1b000b, // for qp21
390     0x3d1d000d, // for qp22
391     0x47270017, // for qp23
392     0x71510041, // for qp24 ! center for qp=0..30
393     0x49290019, // for qp25
394     0x55350025, // for qp26
395     0x4b2b001b, // for qp27
396     0x4d2d001d, // for qp28
397     0x57370027, // for qp29
398     0x81610051, // for qp30
399     0x57270017, // for qp31
400     0x81510041, // for qp32 ! center for qp=31..51
401     0x59290019, // for qp33
402     0x65350025, // for qp34
403     0x5b2b001b, // for qp35
404     0x5d2d001d, // for qp36
405     0x67370027, // for qp37
406     0x91610051, // for qp38
407     0x69390029, // for qp39
408     0x75450035, // for qp40
409     0x6b3b002b, // for qp41
410     0x6d3d002d, // for qp42
411     0x77470037, // for qp43
412     0xa1710061, // for qp44
413     0x79490039, // for qp45
414     0x85550045, // for qp46
415     0x7b4b003b, // for qp47
416     0x7d4d003d, // for qp48
417     0x87570047, // for qp49
418     0xb1810071, // for qp50
419     0x89590049  // for qp51
420 };
421
422 static void gen75_vme_state_setup_fixup(VADriverContextP ctx,
423                                         struct encode_state *encode_state,
424                                         struct intel_encoder_context *encoder_context,
425                                         unsigned int *vme_state_message)
426 {
427     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
428     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
429     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
430
431     if (slice_param->slice_type != SLICE_TYPE_I &&
432         slice_param->slice_type != SLICE_TYPE_SI)
433         return;
434     if (encoder_context->rate_control_mode == VA_RC_CQP)
435         vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
436     else
437         vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
438 }
439
440 static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
441                                           struct encode_state *encode_state,
442                                           int is_intra,
443                                           struct intel_encoder_context *encoder_context)
444 {
445     struct gen6_vme_context *vme_context = encoder_context->vme_context;
446     unsigned int *vme_state_message;
447     int i;
448         
449     //pass the MV/Mb cost into VME message on HASWell
450     assert(vme_context->vme_state_message);
451     vme_state_message = (unsigned int *)vme_context->vme_state_message;
452
453     vme_state_message[0] = 0x4a4a4a4a;
454     vme_state_message[1] = 0x4a4a4a4a;
455     vme_state_message[2] = 0x4a4a4a4a;
456     vme_state_message[3] = 0x22120200;
457     vme_state_message[4] = 0x62524232;
458
459     for (i=5; i < 8; i++) {
460         vme_state_message[i] = 0;
461     }
462
463     switch (encoder_context->codec) {
464     case CODEC_H264:
465         gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
466
467         break;
468
469     default:
470         /* no fixup */
471         break;
472     }
473
474     return VA_STATUS_SUCCESS;
475 }
476
477 static void
478 gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
479                                struct encode_state *encode_state,
480                                int mb_width, int mb_height,
481                                int kernel,
482                                int transform_8x8_mode_flag,
483                                struct intel_encoder_context *encoder_context)
484 {
485     struct gen6_vme_context *vme_context = encoder_context->vme_context;
486     int mb_x = 0, mb_y = 0;
487     int i, s;
488     unsigned int *command_ptr;
489
490     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
491     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
492
493     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
494         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
495         int slice_mb_begin = pSliceParameter->macroblock_address;
496         int slice_mb_number = pSliceParameter->num_macroblocks;
497         unsigned int mb_intra_ub;
498         int slice_mb_x = pSliceParameter->macroblock_address % mb_width; 
499         for (i = 0; i < slice_mb_number;  ) {
500             int mb_count = i + slice_mb_begin;    
501             mb_x = mb_count % mb_width;
502             mb_y = mb_count / mb_width;
503             mb_intra_ub = 0;
504             if (mb_x != 0) {
505                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
506             }
507             if (mb_y != 0) {
508                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
509                 if (mb_x != 0)
510                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
511                 if (mb_x != (mb_width -1))
512                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
513             }
514             if (i < mb_width) {
515                 if (i == 0)
516                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
517                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
518                 if ((i == (mb_width - 1)) && slice_mb_x) {
519                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
520                 }
521             }
522                 
523             if ((i == mb_width) && slice_mb_x) {
524                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
525             }
526             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
527             *command_ptr++ = kernel;
528             *command_ptr++ = 0;
529             *command_ptr++ = 0;
530             *command_ptr++ = 0;
531             *command_ptr++ = 0;
532    
533             /*inline data */
534             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
535             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
536
537             i += 1;
538         } 
539     }
540
541     *command_ptr++ = 0;
542     *command_ptr++ = MI_BATCH_BUFFER_END;
543
544     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
545 }
546
547 static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
548 {
549     struct gen6_vme_context *vme_context = encoder_context->vme_context;
550
551     i965_gpe_context_init(ctx, &vme_context->gpe_context);
552
553     /* VME output buffer */
554     dri_bo_unreference(vme_context->vme_output.bo);
555     vme_context->vme_output.bo = NULL;
556
557     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
558     vme_context->vme_batchbuffer.bo = NULL;
559
560     /* VME state */
561     dri_bo_unreference(vme_context->vme_state.bo);
562     vme_context->vme_state.bo = NULL;
563 }
564
565 static void gen75_vme_pipeline_programing(VADriverContextP ctx, 
566                                           struct encode_state *encode_state,
567                                           struct intel_encoder_context *encoder_context)
568 {
569     struct gen6_vme_context *vme_context = encoder_context->vme_context;
570     struct intel_batchbuffer *batch = encoder_context->base.batch;
571     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
572     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
573     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
574     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
575     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
576     int kernel_shader;
577     bool allow_hwscore = true;
578     int s;
579
580     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
581         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
582         if ((pSliceParameter->macroblock_address % width_in_mbs)) {
583             allow_hwscore = false;
584             break;
585         }
586     }
587     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
588         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
589         kernel_shader = VME_INTRA_SHADER;
590     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
591                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
592         kernel_shader = VME_INTER_SHADER;
593     } else {
594         kernel_shader = VME_BINTER_SHADER;
595         if (!allow_hwscore)
596             kernel_shader = VME_INTER_SHADER;
597     }
598     if (allow_hwscore)
599         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
600                                              encode_state,
601                                              width_in_mbs, height_in_mbs,
602                                              kernel_shader,
603                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
604                                              encoder_context);
605     else
606         gen75_vme_fill_vme_batchbuffer(ctx, 
607                                        encode_state,
608                                        width_in_mbs, height_in_mbs,
609                                        kernel_shader,
610                                        pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
611                                        encoder_context);
612
613     intel_batchbuffer_start_atomic(batch, 0x1000);
614     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
615     BEGIN_BATCH(batch, 2);
616     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
617     OUT_RELOC(batch,
618               vme_context->vme_batchbuffer.bo,
619               I915_GEM_DOMAIN_COMMAND, 0, 
620               0);
621     ADVANCE_BATCH(batch);
622
623     intel_batchbuffer_end_atomic(batch);        
624 }
625
626 static VAStatus gen75_vme_prepare(VADriverContextP ctx, 
627                                   struct encode_state *encode_state,
628                                   struct intel_encoder_context *encoder_context)
629 {
630     VAStatus vaStatus = VA_STATUS_SUCCESS;
631     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
632     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
633     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
634     struct gen6_vme_context *vme_context = encoder_context->vme_context;
635
636     if (!vme_context->h264_level ||
637         (vme_context->h264_level != pSequenceParameter->level_idc)) {
638         vme_context->h264_level = pSequenceParameter->level_idc;        
639     }   
640
641     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
642         
643     /*Setup all the memory object*/
644     gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
645     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
646     //gen75_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
647     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
648
649     /*Programing media pipeline*/
650     gen75_vme_pipeline_programing(ctx, encode_state, encoder_context);
651
652     return vaStatus;
653 }
654
655 static VAStatus gen75_vme_run(VADriverContextP ctx, 
656                               struct encode_state *encode_state,
657                               struct intel_encoder_context *encoder_context)
658 {
659     struct intel_batchbuffer *batch = encoder_context->base.batch;
660
661     intel_batchbuffer_flush(batch);
662
663     return VA_STATUS_SUCCESS;
664 }
665
666 static VAStatus gen75_vme_stop(VADriverContextP ctx, 
667                                struct encode_state *encode_state,
668                                struct intel_encoder_context *encoder_context)
669 {
670     return VA_STATUS_SUCCESS;
671 }
672
673 static VAStatus
674 gen75_vme_pipeline(VADriverContextP ctx,
675                    VAProfile profile,
676                    struct encode_state *encode_state,
677                    struct intel_encoder_context *encoder_context)
678 {
679     gen75_vme_media_init(ctx, encoder_context);
680     gen75_vme_prepare(ctx, encode_state, encoder_context);
681     gen75_vme_run(ctx, encode_state, encoder_context);
682     gen75_vme_stop(ctx, encode_state, encoder_context);
683
684     return VA_STATUS_SUCCESS;
685 }
686
687 static void
688 gen75_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
689                                     struct encode_state *encode_state,
690                                     int index,
691                                     int is_intra,
692                                     struct intel_encoder_context *encoder_context)
693
694 {
695     struct i965_driver_data *i965 = i965_driver_data(ctx);
696     struct gen6_vme_context *vme_context = encoder_context->vme_context;
697     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
698     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
699     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
700
701     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
702     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
703
704     if (is_intra)
705         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
706     else
707         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
708     /*
709      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
710      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
711      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
712      */
713
714     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
715                                               "VME output buffer",
716                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
717                                               0x1000);
718     assert(vme_context->vme_output.bo);
719     vme_context->vme_buffer_suface_setup(ctx,
720                                          &vme_context->gpe_context,
721                                          &vme_context->vme_output,
722                                          BINDING_TABLE_OFFSET(index),
723                                          SURFACE_STATE_OFFSET(index));
724 }
725
726 static void
727 gen75_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
728                                              struct encode_state *encode_state,
729                                              int index,
730                                              struct intel_encoder_context *encoder_context)
731
732 {
733     struct i965_driver_data *i965 = i965_driver_data(ctx);
734     struct gen6_vme_context *vme_context = encoder_context->vme_context;
735     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
736     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
737     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
738
739     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
740     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
741     vme_context->vme_batchbuffer.pitch = 16;
742     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
743                                                    "VME batchbuffer",
744                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
745                                                    0x1000);
746     vme_context->vme_buffer_suface_setup(ctx,
747                                          &vme_context->gpe_context,
748                                          &vme_context->vme_batchbuffer,
749                                          BINDING_TABLE_OFFSET(index),
750                                          SURFACE_STATE_OFFSET(index));
751 }
752
753 static VAStatus
754 gen75_vme_mpeg2_surface_setup(VADriverContextP ctx, 
755                               struct encode_state *encode_state,
756                               int is_intra,
757                               struct intel_encoder_context *encoder_context)
758 {
759     struct object_surface *obj_surface;
760
761     /*Setup surfaces state*/
762     /* current picture for encoding */
763     obj_surface = encode_state->input_yuv_object;
764     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
765     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
766     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
767
768     if (!is_intra) {
769         /* reference 0 */
770         obj_surface = encode_state->reference_objects[0];
771         if (obj_surface->bo != NULL)
772             gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
773
774         /* reference 1 */
775         obj_surface = encode_state->reference_objects[1];
776         if (obj_surface && obj_surface->bo != NULL) 
777             gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
778     }
779
780     /* VME output */
781     gen75_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
782     gen75_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
783
784     return VA_STATUS_SUCCESS;
785 }
786
787 static void
788 gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
789                                      struct encode_state *encode_state,
790                                      int mb_width, int mb_height,
791                                      int kernel,
792                                      int transform_8x8_mode_flag,
793                                      struct intel_encoder_context *encoder_context)
794 {
795     struct gen6_vme_context *vme_context = encoder_context->vme_context;
796     int mb_x = 0, mb_y = 0;
797     int i, s, j;
798     unsigned int *command_ptr;
799
800
801     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
802     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
803
804     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
805         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
806
807         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
808             int slice_mb_begin = slice_param->macroblock_address;
809             int slice_mb_number = slice_param->num_macroblocks;
810             unsigned int mb_intra_ub;
811             int slice_mb_x = slice_param->macroblock_address % mb_width;
812
813             for (i = 0; i < slice_mb_number;) {
814                 int mb_count = i + slice_mb_begin;    
815
816                 mb_x = mb_count % mb_width;
817                 mb_y = mb_count / mb_width;
818                 mb_intra_ub = 0;
819
820                 if (mb_x != 0) {
821                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
822                 }
823
824                 if (mb_y != 0) {
825                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
826
827                     if (mb_x != 0)
828                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
829
830                     if (mb_x != (mb_width -1))
831                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
832                 }
833
834                 if (i < mb_width) {
835                     if (i == 0)
836                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
837
838                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
839
840                     if ((i == (mb_width - 1)) && slice_mb_x) {
841                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
842                     }
843                 }
844                 
845                 if ((i == mb_width) && slice_mb_x) {
846                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
847                 }
848
849                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
850                 *command_ptr++ = kernel;
851                 *command_ptr++ = 0;
852                 *command_ptr++ = 0;
853                 *command_ptr++ = 0;
854                 *command_ptr++ = 0;
855    
856                 /*inline data */
857                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
858                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
859
860                 i += 1;
861             }
862
863             slice_param++;
864         }
865     }
866
867     *command_ptr++ = 0;
868     *command_ptr++ = MI_BATCH_BUFFER_END;
869
870     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
871 }
872
873 static void
874 gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
875                                     struct encode_state *encode_state,
876                                     int is_intra,
877                                     struct intel_encoder_context *encoder_context)
878 {
879     struct gen6_vme_context *vme_context = encoder_context->vme_context;
880     struct intel_batchbuffer *batch = encoder_context->base.batch;
881     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
882     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
883     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
884     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
885     bool allow_hwscore = true;
886     int s;
887     int kernel_shader;
888
889     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
890
891     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
892         int j;
893         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
894
895         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
896             if (slice_param->macroblock_address % width_in_mbs) {
897                 allow_hwscore = false;
898                 break;
899             }
900         }
901     }
902
903     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
904     if (pic_param->picture_type == VAEncPictureTypeIntra) {
905         allow_hwscore = false;
906         kernel_shader = VME_INTRA_SHADER;
907     } else {
908         kernel_shader = VME_INTER_SHADER;
909     }
910
911     if (allow_hwscore) 
912         gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
913                                                    encode_state,
914                                                    width_in_mbs, height_in_mbs,
915                                                    kernel_shader,
916                                                    encoder_context);
917     else
918         gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, 
919                                              encode_state,
920                                              width_in_mbs, height_in_mbs,
921                                              kernel_shader,
922                                              0,
923                                              encoder_context);
924
925     intel_batchbuffer_start_atomic(batch, 0x1000);
926     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
927     BEGIN_BATCH(batch, 2);
928     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
929     OUT_RELOC(batch,
930               vme_context->vme_batchbuffer.bo,
931               I915_GEM_DOMAIN_COMMAND, 0, 
932               0);
933     ADVANCE_BATCH(batch);
934
935     intel_batchbuffer_end_atomic(batch);        
936 }
937
938 static VAStatus 
939 gen75_vme_mpeg2_prepare(VADriverContextP ctx, 
940                         struct encode_state *encode_state,
941                         struct intel_encoder_context *encoder_context)
942 {
943     VAStatus vaStatus = VA_STATUS_SUCCESS;
944     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
945         
946     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
947     struct gen6_vme_context *vme_context = encoder_context->vme_context;
948
949     if ((!vme_context->mpeg2_level) ||
950         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
951         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
952     }
953
954     /*Setup all the memory object*/
955     gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
956     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
957     gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
958     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
959     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
960
961     /*Programing media pipeline*/
962     gen75_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
963
964     return vaStatus;
965 }
966
967 static VAStatus
968 gen75_vme_mpeg2_pipeline(VADriverContextP ctx,
969                          VAProfile profile,
970                          struct encode_state *encode_state,
971                          struct intel_encoder_context *encoder_context)
972 {
973     gen75_vme_media_init(ctx, encoder_context);
974     gen75_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
975     gen75_vme_run(ctx, encode_state, encoder_context);
976     gen75_vme_stop(ctx, encode_state, encoder_context);
977
978     return VA_STATUS_SUCCESS;
979 }
980
981 static void
982 gen75_vme_context_destroy(void *context)
983 {
984     struct gen6_vme_context *vme_context = context;
985
986     i965_gpe_context_destroy(&vme_context->gpe_context);
987
988     dri_bo_unreference(vme_context->vme_output.bo);
989     vme_context->vme_output.bo = NULL;
990
991     dri_bo_unreference(vme_context->vme_state.bo);
992     vme_context->vme_state.bo = NULL;
993
994     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
995     vme_context->vme_batchbuffer.bo = NULL;
996
997     if (vme_context->vme_state_message) {
998         free(vme_context->vme_state_message);
999         vme_context->vme_state_message = NULL;
1000     }
1001
1002     free(vme_context);
1003 }
1004
1005 Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1006 {
1007     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1008     struct i965_kernel *vme_kernel_list = NULL;
1009     int i965_kernel_num;
1010
1011     switch (encoder_context->codec) {
1012     case CODEC_H264:
1013         vme_kernel_list = gen75_vme_kernels;
1014         encoder_context->vme_pipeline = gen75_vme_pipeline;
1015         i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); 
1016         break;
1017
1018     case CODEC_MPEG2:
1019         vme_kernel_list = gen75_vme_mpeg2_kernels;
1020         encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
1021         i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
1022
1023         break;
1024
1025     default:
1026         /* never get here */
1027         assert(0);
1028
1029         break;
1030     }
1031     vme_context->vme_kernel_sum = i965_kernel_num;
1032     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1033
1034     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1035     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1036
1037     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1038
1039     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1040     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1041     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1042     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1043     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1044
1045     gen7_vme_scoreboard_init(ctx, vme_context);
1046
1047     i965_gpe_load_kernels(ctx,
1048                           &vme_context->gpe_context,
1049                           vme_kernel_list,
1050                           i965_kernel_num);
1051     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1052     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1053     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1054     vme_context->vme_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup;
1055
1056     encoder_context->vme_context = vme_context;
1057     encoder_context->vme_context_destroy = gen75_vme_context_destroy;
1058
1059     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1060
1061     return True;
1062 }