e8527c37a237003c2121d352b7702dbae75b516b
[platform/upstream/libva-intel-driver.git] / src / gen75_vme.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include "sysdeps.h"
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_encoder.h"
38 #include "gen6_vme.h"
39 #include "gen6_mfc.h"
40
41 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
42 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
43 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
44
45 #define VME_INTRA_SHADER        0
46 #define VME_INTER_SHADER        1
47 #define VME_BINTER_SHADER       3
48 #define VME_BATCHBUFFER         2
49
50 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
52 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
53
54 #define VME_MSG_LENGTH          32
55   
56 static const uint32_t gen75_vme_intra_frame[][4] = {
57 #include "shaders/vme/intra_frame_haswell.g75b"
58 };
59
60 static const uint32_t gen75_vme_inter_frame[][4] = {
61 #include "shaders/vme/inter_frame_haswell.g75b"
62 };
63
64 static const uint32_t gen75_vme_inter_bframe[][4] = {
65 #include "shaders/vme/inter_bframe_haswell.g75b"
66 };
67
68 static const uint32_t gen75_vme_batchbuffer[][4] = {
69 #include "shaders/vme/batchbuffer.g75b"
70 };
71
72 static struct i965_kernel gen75_vme_kernels[] = {
73     {
74         "VME Intra Frame",
75         VME_INTRA_SHADER, /*index*/
76         gen75_vme_intra_frame,                  
77         sizeof(gen75_vme_intra_frame),          
78         NULL
79     },
80     {
81         "VME inter Frame",
82         VME_INTER_SHADER,
83         gen75_vme_inter_frame,
84         sizeof(gen75_vme_inter_frame),
85         NULL
86     },
87     {
88         "VME BATCHBUFFER",
89         VME_BATCHBUFFER,
90         gen75_vme_batchbuffer,
91         sizeof(gen75_vme_batchbuffer),
92         NULL
93     },
94     {
95         "VME inter BFrame",
96         VME_BINTER_SHADER,
97         gen75_vme_inter_bframe,
98         sizeof(gen75_vme_inter_bframe),
99         NULL
100     }
101 };
102
103 static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = {
104 #include "shaders/vme/intra_frame_haswell.g75b"
105 };
106
107 static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = {
108 #include "shaders/vme/mpeg2_inter_haswell.g75b"
109 };
110
111 static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = {
112 #include "shaders/vme/batchbuffer.g75b"
113 };
114
115 static struct i965_kernel gen75_vme_mpeg2_kernels[] = {
116     {
117         "VME Intra Frame",
118         VME_INTRA_SHADER, /*index*/
119         gen75_vme_mpeg2_intra_frame,                    
120         sizeof(gen75_vme_mpeg2_intra_frame),            
121         NULL
122     },
123     {
124         "VME inter Frame",
125         VME_INTER_SHADER,
126         gen75_vme_mpeg2_inter_frame,
127         sizeof(gen75_vme_mpeg2_inter_frame),
128         NULL
129     },
130     {
131         "VME BATCHBUFFER",
132         VME_BATCHBUFFER,
133         gen75_vme_mpeg2_batchbuffer,
134         sizeof(gen75_vme_mpeg2_batchbuffer),
135         NULL
136     },
137 };
138
139 /* only used for VME source surface state */
140 static void 
141 gen75_vme_source_surface_state(VADriverContextP ctx,
142                                int index,
143                                struct object_surface *obj_surface,
144                                struct intel_encoder_context *encoder_context)
145 {
146     struct gen6_vme_context *vme_context = encoder_context->vme_context;
147
148     vme_context->vme_surface2_setup(ctx,
149                                     &vme_context->gpe_context,
150                                     obj_surface,
151                                     BINDING_TABLE_OFFSET(index),
152                                     SURFACE_STATE_OFFSET(index));
153 }
154
155 static void
156 gen75_vme_media_source_surface_state(VADriverContextP ctx,
157                                      int index,
158                                      struct object_surface *obj_surface,
159                                      struct intel_encoder_context *encoder_context)
160 {
161     struct gen6_vme_context *vme_context = encoder_context->vme_context;
162
163     vme_context->vme_media_rw_surface_setup(ctx,
164                                             &vme_context->gpe_context,
165                                             obj_surface,
166                                             BINDING_TABLE_OFFSET(index),
167                                             SURFACE_STATE_OFFSET(index));
168 }
169
170 static void
171 gen75_vme_media_chroma_source_surface_state(VADriverContextP ctx,
172                                             int index,
173                                             struct object_surface *obj_surface,
174                                             struct intel_encoder_context *encoder_context)
175 {
176     struct gen6_vme_context *vme_context = encoder_context->vme_context;
177
178     vme_context->vme_media_chroma_surface_setup(ctx,
179                                                 &vme_context->gpe_context,
180                                                 obj_surface,
181                                                 BINDING_TABLE_OFFSET(index),
182                                                 SURFACE_STATE_OFFSET(index));
183 }
184
185 static void
186 gen75_vme_output_buffer_setup(VADriverContextP ctx,
187                               struct encode_state *encode_state,
188                               int index,
189                               struct intel_encoder_context *encoder_context)
190
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct gen6_vme_context *vme_context = encoder_context->vme_context;
194     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
195     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
196     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
197     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
198     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
199
200     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
201     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
202
203     if (is_intra)
204         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
205     else
206         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
207     /*
208      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
209      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
210      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
211      */
212
213     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
214                                               "VME output buffer",
215                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
216                                               0x1000);
217     assert(vme_context->vme_output.bo);
218     vme_context->vme_buffer_suface_setup(ctx,
219                                          &vme_context->gpe_context,
220                                          &vme_context->vme_output,
221                                          BINDING_TABLE_OFFSET(index),
222                                          SURFACE_STATE_OFFSET(index));
223 }
224
225 static void
226 gen75_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
227                                        struct encode_state *encode_state,
228                                        int index,
229                                        struct intel_encoder_context *encoder_context)
230
231 {
232     struct i965_driver_data *i965 = i965_driver_data(ctx);
233     struct gen6_vme_context *vme_context = encoder_context->vme_context;
234     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
235     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
236     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
237
238     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
239     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
240     vme_context->vme_batchbuffer.pitch = 16;
241     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
242                                                    "VME batchbuffer",
243                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
244                                                    0x1000);
245     vme_context->vme_buffer_suface_setup(ctx,
246                                          &vme_context->gpe_context,
247                                          &vme_context->vme_batchbuffer,
248                                          BINDING_TABLE_OFFSET(index),
249                                          SURFACE_STATE_OFFSET(index));
250 }
251
252 static VAStatus
253 gen75_vme_surface_setup(VADriverContextP ctx, 
254                         struct encode_state *encode_state,
255                         int is_intra,
256                         struct intel_encoder_context *encoder_context)
257 {
258     struct object_surface *obj_surface;
259
260     /*Setup surfaces state*/
261     /* current picture for encoding */
262     obj_surface = encode_state->input_yuv_object;
263     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
264     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
265     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
266
267     if (!is_intra) {
268         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
269         int slice_type;
270
271         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
272         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
273
274         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state);
275
276         if (slice_type == SLICE_TYPE_B)
277             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state);
278     }
279
280     /* VME output */
281     gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
282     gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
283
284     return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, 
288                                           struct encode_state *encode_state,
289                                           struct intel_encoder_context *encoder_context)
290 {
291     struct gen6_vme_context *vme_context = encoder_context->vme_context;
292     struct gen6_interface_descriptor_data *desc;   
293     int i;
294     dri_bo *bo;
295
296     bo = vme_context->gpe_context.idrt.bo;
297     dri_bo_map(bo, 1);
298     assert(bo->virtual);
299     desc = bo->virtual;
300
301     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
302         struct i965_kernel *kernel;
303         kernel = &vme_context->gpe_context.kernels[i];
304         assert(sizeof(*desc) == 32);
305         /*Setup the descritor table*/
306         memset(desc, 0, sizeof(*desc));
307         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
308         desc->desc2.sampler_count = 0; /* FIXME: */
309         desc->desc2.sampler_state_pointer = 0;
310         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
311         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
312         desc->desc4.constant_urb_entry_read_offset = 0;
313         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
314                 
315         /*kernel start*/
316         dri_bo_emit_reloc(bo,   
317                           I915_GEM_DOMAIN_INSTRUCTION, 0,
318                           0,
319                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
320                           kernel->bo);
321         desc++;
322     }
323     dri_bo_unmap(bo);
324
325     return VA_STATUS_SUCCESS;
326 }
327
328 static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, 
329                                          struct encode_state *encode_state,
330                                          struct intel_encoder_context *encoder_context)
331 {
332     struct gen6_vme_context *vme_context = encoder_context->vme_context;
333     unsigned char *constant_buffer;
334     unsigned int *vme_state_message;
335     int mv_num = 32;
336
337     vme_state_message = (unsigned int *)vme_context->vme_state_message;
338
339     if (encoder_context->codec == CODEC_H264 ||
340         encoder_context->codec == CODEC_H264_MVC) {
341         if (vme_context->h264_level >= 30) {
342             mv_num = 16;
343         
344             if (vme_context->h264_level >= 31)
345                 mv_num = 8;
346         } 
347     } else if (encoder_context->codec == CODEC_MPEG2) {
348         mv_num = 2;
349     }
350
351     vme_state_message[31] = mv_num;
352
353     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
354     assert(vme_context->gpe_context.curbe.bo->virtual);
355     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
356
357     /* VME MV/Mb cost table is passed by using const buffer */
358     /* Now it uses the fixed search path. So it is constructed directly
359      * in the GPU shader.
360      */
361     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
362         
363     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
364
365     return VA_STATUS_SUCCESS;
366 }
367
368 static const unsigned int intra_mb_mode_cost_table[] = {
369     0x31110001, // for qp0
370     0x09110001, // for qp1
371     0x15030001, // for qp2
372     0x0b030001, // for qp3
373     0x0d030011, // for qp4
374     0x17210011, // for qp5
375     0x41210011, // for qp6
376     0x19210011, // for qp7
377     0x25050003, // for qp8
378     0x1b130003, // for qp9
379     0x1d130003, // for qp10
380     0x27070021, // for qp11
381     0x51310021, // for qp12
382     0x29090021, // for qp13
383     0x35150005, // for qp14
384     0x2b0b0013, // for qp15
385     0x2d0d0013, // for qp16
386     0x37170007, // for qp17
387     0x61410031, // for qp18
388     0x39190009, // for qp19
389     0x45250015, // for qp20
390     0x3b1b000b, // for qp21
391     0x3d1d000d, // for qp22
392     0x47270017, // for qp23
393     0x71510041, // for qp24 ! center for qp=0..30
394     0x49290019, // for qp25
395     0x55350025, // for qp26
396     0x4b2b001b, // for qp27
397     0x4d2d001d, // for qp28
398     0x57370027, // for qp29
399     0x81610051, // for qp30
400     0x57270017, // for qp31
401     0x81510041, // for qp32 ! center for qp=31..51
402     0x59290019, // for qp33
403     0x65350025, // for qp34
404     0x5b2b001b, // for qp35
405     0x5d2d001d, // for qp36
406     0x67370027, // for qp37
407     0x91610051, // for qp38
408     0x69390029, // for qp39
409     0x75450035, // for qp40
410     0x6b3b002b, // for qp41
411     0x6d3d002d, // for qp42
412     0x77470037, // for qp43
413     0xa1710061, // for qp44
414     0x79490039, // for qp45
415     0x85550045, // for qp46
416     0x7b4b003b, // for qp47
417     0x7d4d003d, // for qp48
418     0x87570047, // for qp49
419     0xb1810071, // for qp50
420     0x89590049  // for qp51
421 };
422
423 static void gen75_vme_state_setup_fixup(VADriverContextP ctx,
424                                         struct encode_state *encode_state,
425                                         struct intel_encoder_context *encoder_context,
426                                         unsigned int *vme_state_message)
427 {
428     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
429     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
430     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
431
432     if (slice_param->slice_type != SLICE_TYPE_I &&
433         slice_param->slice_type != SLICE_TYPE_SI)
434         return;
435     if (encoder_context->rate_control_mode == VA_RC_CQP)
436         vme_state_message[0] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
437     else
438         vme_state_message[0] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
439 }
440
441 static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
442                                           struct encode_state *encode_state,
443                                           int is_intra,
444                                           struct intel_encoder_context *encoder_context)
445 {
446     struct gen6_vme_context *vme_context = encoder_context->vme_context;
447     unsigned int *vme_state_message;
448     int i;
449         
450     //pass the MV/Mb cost into VME message on HASWell
451     assert(vme_context->vme_state_message);
452     vme_state_message = (unsigned int *)vme_context->vme_state_message;
453
454     vme_state_message[0] = 0x4a4a4a4a;
455     vme_state_message[1] = 0x4a4a4a4a;
456     vme_state_message[2] = 0x4a4a4a4a;
457     vme_state_message[3] = 0x22120200;
458     vme_state_message[4] = 0x62524232;
459
460     for (i=5; i < 8; i++) {
461         vme_state_message[i] = 0;
462     }
463
464     switch (encoder_context->codec) {
465     case CODEC_H264:
466     case CODEC_H264_MVC:
467         gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
468
469         break;
470
471     default:
472         /* no fixup */
473         break;
474     }
475
476     return VA_STATUS_SUCCESS;
477 }
478
479 static void
480 gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
481                                struct encode_state *encode_state,
482                                int mb_width, int mb_height,
483                                int kernel,
484                                int transform_8x8_mode_flag,
485                                struct intel_encoder_context *encoder_context)
486 {
487     struct gen6_vme_context *vme_context = encoder_context->vme_context;
488     int mb_x = 0, mb_y = 0;
489     int i, s;
490     unsigned int *command_ptr;
491
492     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
493     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
494
495     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
496         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
497         int slice_mb_begin = pSliceParameter->macroblock_address;
498         int slice_mb_number = pSliceParameter->num_macroblocks;
499         unsigned int mb_intra_ub;
500         int slice_mb_x = pSliceParameter->macroblock_address % mb_width; 
501         for (i = 0; i < slice_mb_number;  ) {
502             int mb_count = i + slice_mb_begin;    
503             mb_x = mb_count % mb_width;
504             mb_y = mb_count / mb_width;
505             mb_intra_ub = 0;
506             if (mb_x != 0) {
507                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
508             }
509             if (mb_y != 0) {
510                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
511                 if (mb_x != 0)
512                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
513                 if (mb_x != (mb_width -1))
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
515             }
516             if (i < mb_width) {
517                 if (i == 0)
518                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
519                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
520                 if ((i == (mb_width - 1)) && slice_mb_x) {
521                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
522                 }
523             }
524                 
525             if ((i == mb_width) && slice_mb_x) {
526                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
527             }
528             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
529             *command_ptr++ = kernel;
530             *command_ptr++ = 0;
531             *command_ptr++ = 0;
532             *command_ptr++ = 0;
533             *command_ptr++ = 0;
534    
535             /*inline data */
536             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
537             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
538
539             i += 1;
540         } 
541     }
542
543     *command_ptr++ = 0;
544     *command_ptr++ = MI_BATCH_BUFFER_END;
545
546     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
547 }
548
549 static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
550 {
551     struct gen6_vme_context *vme_context = encoder_context->vme_context;
552
553     i965_gpe_context_init(ctx, &vme_context->gpe_context);
554
555     /* VME output buffer */
556     dri_bo_unreference(vme_context->vme_output.bo);
557     vme_context->vme_output.bo = NULL;
558
559     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
560     vme_context->vme_batchbuffer.bo = NULL;
561
562     /* VME state */
563     dri_bo_unreference(vme_context->vme_state.bo);
564     vme_context->vme_state.bo = NULL;
565 }
566
567 static void gen75_vme_pipeline_programing(VADriverContextP ctx, 
568                                           struct encode_state *encode_state,
569                                           struct intel_encoder_context *encoder_context)
570 {
571     struct gen6_vme_context *vme_context = encoder_context->vme_context;
572     struct intel_batchbuffer *batch = encoder_context->base.batch;
573     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
574     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
575     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
576     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
577     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
578     int kernel_shader;
579     bool allow_hwscore = true;
580     int s;
581
582     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
583         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
584         if ((pSliceParameter->macroblock_address % width_in_mbs)) {
585             allow_hwscore = false;
586             break;
587         }
588     }
589     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
590         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
591         kernel_shader = VME_INTRA_SHADER;
592     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
593                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
594         kernel_shader = VME_INTER_SHADER;
595     } else {
596         kernel_shader = VME_BINTER_SHADER;
597         if (!allow_hwscore)
598             kernel_shader = VME_INTER_SHADER;
599     }
600     if (allow_hwscore)
601         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
602                                              encode_state,
603                                              width_in_mbs, height_in_mbs,
604                                              kernel_shader,
605                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
606                                              encoder_context);
607     else
608         gen75_vme_fill_vme_batchbuffer(ctx, 
609                                        encode_state,
610                                        width_in_mbs, height_in_mbs,
611                                        kernel_shader,
612                                        pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
613                                        encoder_context);
614
615     intel_batchbuffer_start_atomic(batch, 0x1000);
616     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
617     BEGIN_BATCH(batch, 2);
618     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
619     OUT_RELOC(batch,
620               vme_context->vme_batchbuffer.bo,
621               I915_GEM_DOMAIN_COMMAND, 0, 
622               0);
623     ADVANCE_BATCH(batch);
624
625     intel_batchbuffer_end_atomic(batch);        
626 }
627
628 static VAStatus gen75_vme_prepare(VADriverContextP ctx, 
629                                   struct encode_state *encode_state,
630                                   struct intel_encoder_context *encoder_context)
631 {
632     VAStatus vaStatus = VA_STATUS_SUCCESS;
633     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
634     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
635     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
636     struct gen6_vme_context *vme_context = encoder_context->vme_context;
637
638     if (!vme_context->h264_level ||
639         (vme_context->h264_level != pSequenceParameter->level_idc)) {
640         vme_context->h264_level = pSequenceParameter->level_idc;        
641     }   
642
643     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
644         
645     /*Setup all the memory object*/
646     gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
647     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
648     //gen75_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
649     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
650
651     /*Programing media pipeline*/
652     gen75_vme_pipeline_programing(ctx, encode_state, encoder_context);
653
654     return vaStatus;
655 }
656
657 static VAStatus gen75_vme_run(VADriverContextP ctx, 
658                               struct encode_state *encode_state,
659                               struct intel_encoder_context *encoder_context)
660 {
661     struct intel_batchbuffer *batch = encoder_context->base.batch;
662
663     intel_batchbuffer_flush(batch);
664
665     return VA_STATUS_SUCCESS;
666 }
667
668 static VAStatus gen75_vme_stop(VADriverContextP ctx, 
669                                struct encode_state *encode_state,
670                                struct intel_encoder_context *encoder_context)
671 {
672     return VA_STATUS_SUCCESS;
673 }
674
675 static VAStatus
676 gen75_vme_pipeline(VADriverContextP ctx,
677                    VAProfile profile,
678                    struct encode_state *encode_state,
679                    struct intel_encoder_context *encoder_context)
680 {
681     gen75_vme_media_init(ctx, encoder_context);
682     gen75_vme_prepare(ctx, encode_state, encoder_context);
683     gen75_vme_run(ctx, encode_state, encoder_context);
684     gen75_vme_stop(ctx, encode_state, encoder_context);
685
686     return VA_STATUS_SUCCESS;
687 }
688
689 static void
690 gen75_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
691                                     struct encode_state *encode_state,
692                                     int index,
693                                     int is_intra,
694                                     struct intel_encoder_context *encoder_context)
695
696 {
697     struct i965_driver_data *i965 = i965_driver_data(ctx);
698     struct gen6_vme_context *vme_context = encoder_context->vme_context;
699     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
700     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
701     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
702
703     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
704     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
705
706     if (is_intra)
707         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
708     else
709         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
710     /*
711      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
712      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
713      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
714      */
715
716     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
717                                               "VME output buffer",
718                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
719                                               0x1000);
720     assert(vme_context->vme_output.bo);
721     vme_context->vme_buffer_suface_setup(ctx,
722                                          &vme_context->gpe_context,
723                                          &vme_context->vme_output,
724                                          BINDING_TABLE_OFFSET(index),
725                                          SURFACE_STATE_OFFSET(index));
726 }
727
728 static void
729 gen75_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
730                                              struct encode_state *encode_state,
731                                              int index,
732                                              struct intel_encoder_context *encoder_context)
733
734 {
735     struct i965_driver_data *i965 = i965_driver_data(ctx);
736     struct gen6_vme_context *vme_context = encoder_context->vme_context;
737     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
738     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
739     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
740
741     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
742     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
743     vme_context->vme_batchbuffer.pitch = 16;
744     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
745                                                    "VME batchbuffer",
746                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
747                                                    0x1000);
748     vme_context->vme_buffer_suface_setup(ctx,
749                                          &vme_context->gpe_context,
750                                          &vme_context->vme_batchbuffer,
751                                          BINDING_TABLE_OFFSET(index),
752                                          SURFACE_STATE_OFFSET(index));
753 }
754
755 static VAStatus
756 gen75_vme_mpeg2_surface_setup(VADriverContextP ctx, 
757                               struct encode_state *encode_state,
758                               int is_intra,
759                               struct intel_encoder_context *encoder_context)
760 {
761     struct object_surface *obj_surface;
762
763     /*Setup surfaces state*/
764     /* current picture for encoding */
765     obj_surface = encode_state->input_yuv_object;
766     gen75_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
767     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
768     gen75_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
769
770     if (!is_intra) {
771         /* reference 0 */
772         obj_surface = encode_state->reference_objects[0];
773         if (obj_surface->bo != NULL)
774             gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
775
776         /* reference 1 */
777         obj_surface = encode_state->reference_objects[1];
778         if (obj_surface && obj_surface->bo != NULL) 
779             gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
780     }
781
782     /* VME output */
783     gen75_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
784     gen75_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
785
786     return VA_STATUS_SUCCESS;
787 }
788
789 static void
790 gen75_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
791                                      struct encode_state *encode_state,
792                                      int mb_width, int mb_height,
793                                      int kernel,
794                                      int transform_8x8_mode_flag,
795                                      struct intel_encoder_context *encoder_context)
796 {
797     struct gen6_vme_context *vme_context = encoder_context->vme_context;
798     int mb_x = 0, mb_y = 0;
799     int i, s, j;
800     unsigned int *command_ptr;
801
802
803     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
804     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
805
806     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
807         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
808
809         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
810             int slice_mb_begin = slice_param->macroblock_address;
811             int slice_mb_number = slice_param->num_macroblocks;
812             unsigned int mb_intra_ub;
813             int slice_mb_x = slice_param->macroblock_address % mb_width;
814
815             for (i = 0; i < slice_mb_number;) {
816                 int mb_count = i + slice_mb_begin;    
817
818                 mb_x = mb_count % mb_width;
819                 mb_y = mb_count / mb_width;
820                 mb_intra_ub = 0;
821
822                 if (mb_x != 0) {
823                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
824                 }
825
826                 if (mb_y != 0) {
827                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
828
829                     if (mb_x != 0)
830                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
831
832                     if (mb_x != (mb_width -1))
833                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
834                 }
835
836                 if (i < mb_width) {
837                     if (i == 0)
838                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
839
840                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
841
842                     if ((i == (mb_width - 1)) && slice_mb_x) {
843                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
844                     }
845                 }
846                 
847                 if ((i == mb_width) && slice_mb_x) {
848                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
849                 }
850
851                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
852                 *command_ptr++ = kernel;
853                 *command_ptr++ = 0;
854                 *command_ptr++ = 0;
855                 *command_ptr++ = 0;
856                 *command_ptr++ = 0;
857    
858                 /*inline data */
859                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
860                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
861
862                 i += 1;
863             }
864
865             slice_param++;
866         }
867     }
868
869     *command_ptr++ = 0;
870     *command_ptr++ = MI_BATCH_BUFFER_END;
871
872     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
873 }
874
875 static void
876 gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
877                                     struct encode_state *encode_state,
878                                     int is_intra,
879                                     struct intel_encoder_context *encoder_context)
880 {
881     struct gen6_vme_context *vme_context = encoder_context->vme_context;
882     struct intel_batchbuffer *batch = encoder_context->base.batch;
883     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
884     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
885     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
886     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
887     bool allow_hwscore = true;
888     int s;
889     int kernel_shader;
890
891     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
892
893     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
894         int j;
895         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
896
897         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
898             if (slice_param->macroblock_address % width_in_mbs) {
899                 allow_hwscore = false;
900                 break;
901             }
902         }
903     }
904
905     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
906     if (pic_param->picture_type == VAEncPictureTypeIntra) {
907         allow_hwscore = false;
908         kernel_shader = VME_INTRA_SHADER;
909     } else {
910         kernel_shader = VME_INTER_SHADER;
911     }
912
913     if (allow_hwscore) 
914         gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
915                                                    encode_state,
916                                                    width_in_mbs, height_in_mbs,
917                                                    kernel_shader,
918                                                    encoder_context);
919     else
920         gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, 
921                                              encode_state,
922                                              width_in_mbs, height_in_mbs,
923                                              kernel_shader,
924                                              0,
925                                              encoder_context);
926
927     intel_batchbuffer_start_atomic(batch, 0x1000);
928     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
929     BEGIN_BATCH(batch, 2);
930     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
931     OUT_RELOC(batch,
932               vme_context->vme_batchbuffer.bo,
933               I915_GEM_DOMAIN_COMMAND, 0, 
934               0);
935     ADVANCE_BATCH(batch);
936
937     intel_batchbuffer_end_atomic(batch);        
938 }
939
940 static VAStatus 
941 gen75_vme_mpeg2_prepare(VADriverContextP ctx, 
942                         struct encode_state *encode_state,
943                         struct intel_encoder_context *encoder_context)
944 {
945     VAStatus vaStatus = VA_STATUS_SUCCESS;
946     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
947         
948     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
949     struct gen6_vme_context *vme_context = encoder_context->vme_context;
950
951     if ((!vme_context->mpeg2_level) ||
952         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
953         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
954     }
955
956     /*Setup all the memory object*/
957     gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
958     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
959     gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
960     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
961     gen75_vme_constant_setup(ctx, encode_state, encoder_context);
962
963     /*Programing media pipeline*/
964     gen75_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
965
966     return vaStatus;
967 }
968
969 static VAStatus
970 gen75_vme_mpeg2_pipeline(VADriverContextP ctx,
971                          VAProfile profile,
972                          struct encode_state *encode_state,
973                          struct intel_encoder_context *encoder_context)
974 {
975     gen75_vme_media_init(ctx, encoder_context);
976     gen75_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
977     gen75_vme_run(ctx, encode_state, encoder_context);
978     gen75_vme_stop(ctx, encode_state, encoder_context);
979
980     return VA_STATUS_SUCCESS;
981 }
982
983 static void
984 gen75_vme_context_destroy(void *context)
985 {
986     struct gen6_vme_context *vme_context = context;
987
988     i965_gpe_context_destroy(&vme_context->gpe_context);
989
990     dri_bo_unreference(vme_context->vme_output.bo);
991     vme_context->vme_output.bo = NULL;
992
993     dri_bo_unreference(vme_context->vme_state.bo);
994     vme_context->vme_state.bo = NULL;
995
996     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
997     vme_context->vme_batchbuffer.bo = NULL;
998
999     if (vme_context->vme_state_message) {
1000         free(vme_context->vme_state_message);
1001         vme_context->vme_state_message = NULL;
1002     }
1003
1004     free(vme_context);
1005 }
1006
1007 Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1008 {
1009     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1010     struct i965_kernel *vme_kernel_list = NULL;
1011     int i965_kernel_num;
1012
1013     switch (encoder_context->codec) {
1014     case CODEC_H264:
1015     case CODEC_H264_MVC:
1016         vme_kernel_list = gen75_vme_kernels;
1017         encoder_context->vme_pipeline = gen75_vme_pipeline;
1018         i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); 
1019         break;
1020
1021     case CODEC_MPEG2:
1022         vme_kernel_list = gen75_vme_mpeg2_kernels;
1023         encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline;
1024         i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
1025
1026         break;
1027
1028     default:
1029         /* never get here */
1030         assert(0);
1031
1032         break;
1033     }
1034     vme_context->vme_kernel_sum = i965_kernel_num;
1035     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1036
1037     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1038     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1039
1040     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1041
1042     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1043     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1044     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1045     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1046     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1047
1048     gen7_vme_scoreboard_init(ctx, vme_context);
1049
1050     i965_gpe_load_kernels(ctx,
1051                           &vme_context->gpe_context,
1052                           vme_kernel_list,
1053                           i965_kernel_num);
1054     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1055     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1056     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1057     vme_context->vme_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup;
1058
1059     encoder_context->vme_context = vme_context;
1060     encoder_context->vme_context_destroy = gen75_vme_context_destroy;
1061
1062     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1063
1064     return True;
1065 }