Fixed 1080p issue and add transform 8x8 support.
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
218
219     BEGIN_BCS_BATCH(batch, 11);
220
221     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224     /* MFX Indirect MV Object Base Address */
225     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226     OUT_BCS_BATCH(batch, 0);    
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
232     OUT_BCS_RELOC(batch,
233                   mfc_context->mfc_indirect_pak_bse_object.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
246 {
247     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
250
251     BEGIN_BCS_BATCH(batch, 11);
252
253     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     /* MFX Indirect MV Object Base Address */
257     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
264     OUT_BCS_RELOC(batch,
265                   mfc_context->mfc_indirect_pak_bse_object.bo,
266                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                   0);
268     OUT_BCS_RELOC(batch,
269                   mfc_context->mfc_indirect_pak_bse_object.bo,
270                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
272
273     ADVANCE_BCS_BATCH(batch);
274 }
275
276 static void
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
278 {
279     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
281
282     BEGIN_BCS_BATCH(batch, 4);
283
284     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
287                   0);
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294 static void
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296                        struct gen6_encoder_context *gen6_encoder_context)
297 {
298     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
301     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
302     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
303     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
304
305     BEGIN_BCS_BATCH(batch, 13);
306     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
307     OUT_BCS_BATCH(batch, 
308                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
309     OUT_BCS_BATCH(batch, 
310                   (height_in_mbs << 16) | 
311                   (width_in_mbs << 0));
312     OUT_BCS_BATCH(batch, 
313                   (0 << 24) |     /*Second Chroma QP Offset*/
314                   (0 << 16) |     /*Chroma QP Offset*/
315                   (0 << 14) |   /*Max-bit conformance Intra flag*/
316                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
317                   (1 << 12) |   /*Should always be written as "1" */
318                   (0 << 10) |   /*QM Preset FLag */
319                   (0 << 8)  |   /*Image Structure*/
320                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
321     OUT_BCS_BATCH(batch,
322                   (400 << 16) |   /*Mininum Frame size*/        
323                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
324                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
325                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
326                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
327                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
328                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
329                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
330                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
331                   (pSequenceParameter->direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
332                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
333                   (1 << 2)  |   /*Frame MB only flag*/
334                   (0 << 1)  |   /*MBAFF mode is in active*/
335                   (0 << 0) );   /*Field picture flag*/
336     OUT_BCS_BATCH(batch, 
337                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
338                   (1<<12)   |   
339                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
340                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
341                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
342                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
343                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
344     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
345                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
346                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
347     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
348     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
349     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
350     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
351     OUT_BCS_BATCH(batch, 0x00800001);   
352     OUT_BCS_BATCH(batch, 0);
353
354     ADVANCE_BCS_BATCH(batch);
355 }
356
357 static void
358 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
359 {
360     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
361     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
362
363     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
364     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
365
366     BEGIN_BCS_BATCH(batch, 16);
367     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
368     OUT_BCS_BATCH(batch,
369                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
370     OUT_BCS_BATCH(batch, 
371                   ((height_in_mbs - 1) << 16) | 
372                   ((width_in_mbs - 1) << 0));
373     OUT_BCS_BATCH(batch, 
374                   (0 << 24) |   /* Second Chroma QP Offset */
375                   (0 << 16) |   /* Chroma QP Offset */
376                   (0 << 14) |   /* Max-bit conformance Intra flag */
377                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
378                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
379                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
380                   (0 << 8)  |   /* FIXME: Image Structure */
381                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
382     OUT_BCS_BATCH(batch,
383                   (0 << 16) |   /* Mininum Frame size */
384                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
385                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
386                   (0 << 13) |   /* CABAC 0 word insertion test enable */
387                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
388                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
389                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
390                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
391                   (0 << 6)  |   /* Only valid for VLD decoding mode */
392                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
393                   (0 << 4)  |   /* Direct 8x8 inference flag */
394                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
395                   (1 << 2)  |   /* Frame MB only flag */
396                   (0 << 1)  |   /* MBAFF mode is in active */
397                   (0 << 0));    /* Field picture flag */
398     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
399     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
400                   (0xBB8 << 16) |       /* InterMbMaxSz */
401                   (0xEE8) );            /* IntraMbMaxSz */
402     OUT_BCS_BATCH(batch, 0);            /* Reserved */
403     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
404     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
405     OUT_BCS_BATCH(batch, 0x8C000000);
406     OUT_BCS_BATCH(batch, 0x00010000);
407     OUT_BCS_BATCH(batch, 0);
408     OUT_BCS_BATCH(batch, 0);
409     OUT_BCS_BATCH(batch, 0);
410     OUT_BCS_BATCH(batch, 0);
411
412     ADVANCE_BCS_BATCH(batch);
413 }
414
415 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
416 {
417     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
418     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
419
420     int i;
421
422     BEGIN_BCS_BATCH(batch, 69);
423
424     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
425
426     /* Reference frames and Current frames */
427     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
428         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
429             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
430                   I915_GEM_DOMAIN_INSTRUCTION, 0,
431                   0);
432          } else {
433              OUT_BCS_BATCH(batch, 0);
434          }
435     }
436
437     /* POL list */
438     for(i = 0; i < 32; i++) {
439         OUT_BCS_BATCH(batch, i/2);
440     }
441     OUT_BCS_BATCH(batch, 0);
442     OUT_BCS_BATCH(batch, 0);
443
444     ADVANCE_BCS_BATCH(batch);
445 }
446
447 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
448                                      int slice_type,
449                                      struct encode_state *encode_state,
450                                      struct gen6_encoder_context *gen6_encoder_context,
451                                      int rate_control_enable,
452                                      int qp)
453 {
454     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
455     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
456     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
457     int bit_rate_control_target;
458     if ( slice_type == SLICE_TYPE_I )
459         bit_rate_control_target = 0;
460     else
461         bit_rate_control_target = 1;
462     int maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
463     int maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
464     unsigned char correct[6];
465     int i;
466
467     for (i = 0; i < 6; i++)
468         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
469     unsigned char grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
470                          (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
471     unsigned char shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
472                          (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
473
474     BEGIN_BCS_BATCH(batch, 11);;
475
476     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
477
478         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
479
480     if ( slice_type == SLICE_TYPE_I ) {
481         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
482     } else {
483         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
484     }
485
486     OUT_BCS_BATCH(batch, 
487                   (pSliceParameter->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
488                   (0<<24) |                /*Enable deblocking operation*/
489                   (qp<<16) |                    /*Slice Quantization Parameter*/
490                   0x0202 );
491     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
492     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
493
494     OUT_BCS_BATCH(batch, 
495                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
496                   (1<<30) |             /*ResetRateControlCounter*/
497                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
498                   (4<<24) |     /*RC Stable Tolerance, middle level*/
499                   (rate_control_enable<<23) |     /*RC Panic Enable*/                 
500                   (0<<22) |     /*QP mode, don't modfiy CBP*/
501                   (0<<21) |     /*MB Type Direct Conversion Enabled*/ 
502                   (0<<20) |     /*MB Type Skip Conversion Enabled*/ 
503                   (1<<19) |     /*IsLastSlice*/
504                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
505                   (1<<17) |         /*HeaderPresentFlag*/       
506                   (1<<16) |         /*SliceData PresentFlag*/
507                   (1<<15) |         /*TailPresentFlag*/
508                   (1<<13) |         /*RBSP NAL TYPE*/   
509                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
510         
511     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
512
513     OUT_BCS_BATCH(batch, (maxQpN<<24) |     /*Target QP - 24 is lowest QP*/ 
514                          (maxQpP<<16) |     /*Target QP + 20 is highest QP*/
515                          (shrink<<8)  |
516                          (grow<<0));   
517     OUT_BCS_BATCH(batch, (correct[5] << 20) |
518                          (correct[4] << 16) |
519                          (correct[3] << 12) |
520                          (correct[2] << 8) |
521                          (correct[1] << 4) |
522                          (correct[0] << 0));
523     OUT_BCS_BATCH(batch, 0);
524
525     ADVANCE_BCS_BATCH(batch);
526 }
527 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
528 {
529     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 58);
533
534     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
535     OUT_BCS_BATCH(batch, 0xFF ) ; 
536     for( i = 0; i < 56; i++) {
537         OUT_BCS_BATCH(batch, 0x10101010); 
538     }   
539
540     ADVANCE_BCS_BATCH(batch);
541 }
542
543 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
544 {
545     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
546     int i;
547
548     BEGIN_BCS_BATCH(batch, 113);
549     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
550
551     for(i = 0; i < 112;i++) {
552         OUT_BCS_BATCH(batch, 0x10001000);
553     }   
554
555     ADVANCE_BCS_BATCH(batch);   
556 }
557
558 static void
559 gen7_mfc_qm_state(VADriverContextP ctx,
560                   int qm_type,
561                   unsigned int *qm,
562                   int qm_length,
563                   struct gen6_encoder_context *gen6_encoder_context)
564 {
565     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
566     unsigned int qm_buffer[16];
567
568     assert(qm_length <= 16);
569     assert(sizeof(*qm) == 4);
570     memcpy(qm_buffer, qm, qm_length * 4);
571
572     BEGIN_BCS_BATCH(batch, 18);
573     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
574     OUT_BCS_BATCH(batch, qm_type << 0);
575     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
576     ADVANCE_BCS_BATCH(batch);
577 }
578
579 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
580 {
581     unsigned int qm[16] = {
582         0x10101010, 0x10101010, 0x10101010, 0x10101010,
583         0x10101010, 0x10101010, 0x10101010, 0x10101010,
584         0x10101010, 0x10101010, 0x10101010, 0x10101010,
585         0x10101010, 0x10101010, 0x10101010, 0x10101010
586     };
587
588     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
589     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
590     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
591     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
592 }
593
594 static void
595 gen7_mfc_fqm_state(VADriverContextP ctx,
596                    int fqm_type,
597                    unsigned int *fqm,
598                    int fqm_length,
599                    struct gen6_encoder_context *gen6_encoder_context)
600 {
601     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
602     unsigned int fqm_buffer[32];
603
604     assert(fqm_length <= 32);
605     assert(sizeof(*fqm) == 4);
606     memcpy(fqm_buffer, fqm, fqm_length * 4);
607
608     BEGIN_BCS_BATCH(batch, 34);
609     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
610     OUT_BCS_BATCH(batch, fqm_type << 0);
611     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
616 {
617     unsigned int qm[32] = {
618         0x10001000, 0x10001000, 0x10001000, 0x10001000,
619         0x10001000, 0x10001000, 0x10001000, 0x10001000,
620         0x10001000, 0x10001000, 0x10001000, 0x10001000,
621         0x10001000, 0x10001000, 0x10001000, 0x10001000,
622         0x10001000, 0x10001000, 0x10001000, 0x10001000,
623         0x10001000, 0x10001000, 0x10001000, 0x10001000,
624         0x10001000, 0x10001000, 0x10001000, 0x10001000,
625         0x10001000, 0x10001000, 0x10001000, 0x10001000
626     };
627
628     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
629     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
630     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
631     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
632 }
633
634 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
635 {
636     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
637     int i;
638
639         BEGIN_BCS_BATCH(batch, 10);
640         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
641         OUT_BCS_BATCH(batch, 0);                  //Select L0
642         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
643         for(i = 0; i < 7; i++) {
644                 OUT_BCS_BATCH(batch, 0x80808080);
645         }   
646         ADVANCE_BCS_BATCH(batch);
647
648         BEGIN_BCS_BATCH(batch, 10);
649         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
650         OUT_BCS_BATCH(batch, 1);                  //Select L1
651         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
652         for(i = 0; i < 7; i++) {
653                 OUT_BCS_BATCH(batch, 0x80808080);
654         }   
655         ADVANCE_BCS_BATCH(batch);
656 }
657         
658 static void
659 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
660                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
661                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
662 {
663     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
664
665     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
666
667     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
668     OUT_BCS_BATCH(batch,
669                   (0 << 16) |   /* always start at offset 0 */
670                   (data_bits_in_last_dw << 8) |
671                   (skip_emul_byte_count << 4) |
672                   (!!emulation_flag << 3) |
673                   ((!!is_last_header) << 2) |
674                   ((!!is_end_of_slice) << 1) |
675                   (0 << 0));    /* FIXME: ??? */
676
677     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
678     ADVANCE_BCS_BATCH(batch);
679 }
680
681 static int
682 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
683                               struct gen6_encoder_context *gen6_encoder_context,
684                               unsigned char target_mb_size, unsigned char max_mb_size)
685 {
686     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
687     int len_in_dwords = 11;
688
689     BEGIN_BCS_BATCH(batch, len_in_dwords);
690
691     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694     OUT_BCS_BATCH(batch, 
695                   (0 << 24) |           /* PackedMvNum, Debug*/
696                   (0 << 20) |           /* No motion vector */
697                   (1 << 19) |           /* CbpDcY */
698                   (1 << 18) |           /* CbpDcU */
699                   (1 << 17) |           /* CbpDcV */
700                   (msg[0] & 0xFFFF) );
701
702     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
703     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
704     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
705
706     /*Stuff for Intra MB*/
707     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
708     OUT_BCS_BATCH(batch, msg[2]);       
709     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
710     
711     /*MaxSizeInWord and TargetSzieInWord*/
712     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
713                          (target_mb_size << 16) );
714
715     ADVANCE_BCS_BATCH(batch);
716
717     return len_in_dwords;
718 }
719
720 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
721                                          struct gen6_encoder_context *gen6_encoder_context,
722                                          unsigned char target_mb_size,unsigned char max_mb_size, int slice_type)
723 {
724     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
725     int len_in_dwords = 11;
726
727     BEGIN_BCS_BATCH(batch, len_in_dwords);
728
729     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
730
731     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
732     OUT_BCS_BATCH(batch, offset);
733
734     OUT_BCS_BATCH(batch, 
735                   (1 << 24) |     /* PackedMvNum, Debug*/
736                   (4 << 20) |     /* 8 MV, SNB don't use it*/
737                   (1 << 19) |     /* CbpDcY */
738                   (1 << 18) |     /* CbpDcU */
739                   (1 << 17) |     /* CbpDcV */
740                   (0 << 15) |     /* Transform8x8Flag = 0*/
741                   (0 << 14) |     /* Frame based*/
742                   (0 << 13) |     /* Inter MB */
743                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
744                   (0 << 7)  |     /* MBZ for frame */
745                   (0 << 6)  |     /* MBZ */
746                   (2 << 4)  |     /* MBZ for inter*/
747                   (0 << 3)  |     /* MBZ */
748                   (0 << 2)  |     /* SkipMbFlag */
749                   (0 << 0));      /* InterMbMode */
750
751     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
752     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
753 #if 0 
754     if ( slice_type == SLICE_TYPE_B) {
755         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
756     } else {
757         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
758     }
759 #else
760     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
761 #endif
762
763
764     /*Stuff for Inter MB*/
765     OUT_BCS_BATCH(batch, 0x0);        
766     OUT_BCS_BATCH(batch, 0x0);    
767     OUT_BCS_BATCH(batch, 0x0);        
768
769     /*MaxSizeInWord and TargetSzieInWord*/
770     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
771                          (target_mb_size << 16) );
772
773     ADVANCE_BCS_BATCH(batch);
774
775     return len_in_dwords;
776 }
777
778 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
779 {
780     struct i965_driver_data *i965 = i965_driver_data(ctx);
781     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
782     dri_bo *bo;
783     int i;
784
785     /*Encode common setup for MFC*/
786     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
787     mfc_context->post_deblocking_output.bo = NULL;
788
789     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
790     mfc_context->pre_deblocking_output.bo = NULL;
791
792     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
793     mfc_context->uncompressed_picture_source.bo = NULL;
794
795     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
796     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
797
798     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
799         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
800             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
801         mfc_context->direct_mv_buffers[i].bo = NULL;
802     }
803
804     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
805         if (mfc_context->reference_surfaces[i].bo != NULL)
806             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
807         mfc_context->reference_surfaces[i].bo = NULL;  
808     }
809
810     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
811     bo = dri_bo_alloc(i965->intel.bufmgr,
812                       "Buffer",
813                       128 * 64,
814                       64);
815     assert(bo);
816     mfc_context->intra_row_store_scratch_buffer.bo = bo;
817
818     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
819     bo = dri_bo_alloc(i965->intel.bufmgr,
820                       "Buffer",
821                       128*128*16,
822                       64);
823     assert(bo);
824     mfc_context->macroblock_status_buffer.bo = bo;
825
826     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
827     bo = dri_bo_alloc(i965->intel.bufmgr,
828                       "Buffer",
829                       49152,  /* 6 * 128 * 64 */
830                       64);
831     assert(bo);
832     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
833
834     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
835     bo = dri_bo_alloc(i965->intel.bufmgr,
836                       "Buffer",
837                       12288, /* 1.5 * 128 * 64 */
838                       0x1000);
839     assert(bo);
840     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
841 }
842
843 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
844                                       struct encode_state *encode_state,
845                                       struct gen6_encoder_context *gen6_encoder_context)
846 {
847     struct i965_driver_data *i965 = i965_driver_data(ctx);
848     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
849     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
850     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
851     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
852     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
853     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
854     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
855     unsigned int *msg = NULL, offset = 0;
856     int emit_new_state = 1, object_len_in_bytes;
857     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
858     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
859     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
860     int x,y;
861     int rate_control_mode = pSequenceParameter->rate_control_method; 
862     unsigned char target_mb_size = mfc_context->bit_rate_control_context[1-is_intra].TargetSizeInWord;
863     unsigned char max_mb_size = mfc_context->bit_rate_control_context[1-is_intra].MaxSizeInWord;
864     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
865     unsigned char *slice_header = NULL;
866     int slice_header_length_in_bits = 0;
867     unsigned int tail_data[] = { 0x0 };
868
869     if (encode_state->dec_ref_pic_marking)
870         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
871
872     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
873
874     if ( rate_control_mode == 0) {
875         qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
876     }
877
878     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
879     
880     if (is_intra) {
881         dri_bo_map(vme_context->vme_output.bo , 1);
882         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
883     }
884
885     for (y = 0; y < height_in_mbs; y++) {
886         for (x = 0; x < width_in_mbs; x++) { 
887             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
888             
889             if (emit_new_state) {
890                 intel_batchbuffer_emit_mi_flush(batch);
891                 
892                 if (IS_GEN7(i965->intel.device_id)) {
893                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
894                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
895                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
896                 } else {
897                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
898                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
899                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
900                 }
901
902                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
903                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
904
905                 if (IS_GEN7(i965->intel.device_id)) {
906                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
907                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
908                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
909                 } else {
910                     gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
911                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
912                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
913                 }
914
915                 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context); 
916                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
917                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, 
918                                          encode_state, gen6_encoder_context, 
919                                          rate_control_mode == 0, pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta);
920
921                 if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
922                     VAEncPackedHeaderParameterBuffer *param = NULL;
923                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
924                     unsigned int length_in_bits;
925
926                     assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
927                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
928                     length_in_bits = param->length_in_bits[0];
929
930                     gen6_mfc_avc_insert_object(ctx, 
931                                                gen6_encoder_context,
932                                                header_data,
933                                                ALIGN(length_in_bits, 32) >> 5,
934                                                length_in_bits & 0x1f,
935                                                param->skip_emulation_check_count,
936                                                0,
937                                                0,
938                                                param->insert_emulation_bytes);
939                 }
940
941                 if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
942                     VAEncPackedHeaderParameterBuffer *param = NULL;
943                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
944                     unsigned int length_in_bits;
945
946                     assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
947                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
948                     length_in_bits = param->length_in_bits[0];
949
950                     gen6_mfc_avc_insert_object(ctx, 
951                                                gen6_encoder_context,
952                                                header_data,
953                                                ALIGN(length_in_bits, 32) >> 5,
954                                                length_in_bits & 0x1f,
955                                                param->skip_emulation_check_count,
956                                                0,
957                                                0,
958                                                param->insert_emulation_bytes);
959                 }
960
961                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
962                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
963                                            5,  /* first 5 bytes are start code + nal unit type */
964                                            1, 0, 1);
965                 emit_new_state = 0;
966             }
967
968             if (is_intra) {
969                 assert(msg);
970                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context,target_mb_size, max_mb_size);
971                 msg += 4;
972             } else {
973                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, target_mb_size, max_mb_size, pSliceParameter->slice_type);
974                 offset += 64;
975             }
976
977             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
978                 assert(0);
979                 intel_batchbuffer_end_atomic(batch);
980                 intel_batchbuffer_flush(batch);
981                 emit_new_state = 1;
982                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
983             }
984         }
985     }
986
987     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
988                                tail_data, sizeof(tail_data) >> 2, 32,
989                                sizeof(tail_data), 1, 1, 1);
990
991     if (is_intra)
992         dri_bo_unmap(vme_context->vme_output.bo);
993
994     free(slice_header);
995
996     intel_batchbuffer_end_atomic(batch);
997 }
998
999 static void 
1000 gen6_mfc_free_avc_surface(void **data)
1001 {
1002     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
1003
1004     if (!avc_surface)
1005         return;
1006
1007     dri_bo_unreference(avc_surface->dmv_top);
1008     avc_surface->dmv_top = NULL;
1009     dri_bo_unreference(avc_surface->dmv_bottom);
1010     avc_surface->dmv_bottom = NULL;
1011
1012     free(avc_surface);
1013     *data = NULL;
1014 }
1015
1016 static void gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
1017                                                    struct gen6_mfc_context *mfc_context) 
1018 {
1019     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1020     
1021     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1022     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1023     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
1024     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
1025     int intra_mb_size = inter_mb_size * 5.0;
1026     int i;
1027     
1028     mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
1029     mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
1030     mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
1031     mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
1032
1033     for(i = 0 ; i < 2; i++) {
1034         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
1035         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
1036         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
1037         mfc_context->bit_rate_control_context[i].GrowInit = 6;
1038         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
1039         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
1040         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
1041         
1042         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
1043         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
1044         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
1045         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
1046         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
1047         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
1048     }
1049     
1050     mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
1051     mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
1052
1053     mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
1054     mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
1055 }
1056
1057 static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state, 
1058                                                    struct gen6_mfc_context *mfc_context,
1059                                                    int current_frame_size) 
1060 {
1061     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
1062     int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
1063     int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
1064
1065     /*
1066     printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index, 
1067             mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
1068             mfc_context->bit_rate_control_context[control_index].target_frame_size );
1069     */
1070
1071     if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
1072         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
1073     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
1074         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
1075     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
1076         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
1077     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
1078         mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
1079     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 )  {
1080          mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
1081     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 )  {
1082          mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
1083     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 )  {
1084          mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
1085     }
1086     
1087     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
1088         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
1089     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
1090         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
1091  
1092     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
1093         return 0;
1094
1095     return 1;
1096 }
1097
1098 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
1099                                      struct encode_state *encode_state,
1100                                      struct gen6_encoder_context *gen6_encoder_context)
1101 {
1102     struct i965_driver_data *i965 = i965_driver_data(ctx);
1103     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1104     struct object_surface *obj_surface; 
1105     struct object_buffer *obj_buffer;
1106     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1107     dri_bo *bo;
1108     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1109     VAStatus vaStatus = VA_STATUS_SUCCESS;
1110         int i;
1111
1112     /*Setup all the input&output object*/
1113
1114     /* Setup current frame and current direct mv buffer*/
1115     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1116     assert(obj_surface);
1117     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1118     if ( obj_surface->private_data == NULL) {
1119         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1120         gen6_avc_surface->dmv_top = 
1121             dri_bo_alloc(i965->intel.bufmgr,
1122                     "Buffer",
1123                     68*8192, 
1124                     64);
1125         gen6_avc_surface->dmv_bottom = 
1126             dri_bo_alloc(i965->intel.bufmgr,
1127                             "Buffer",
1128                             68*8192, 
1129                             64);
1130         assert(gen6_avc_surface->dmv_top);
1131         assert(gen6_avc_surface->dmv_bottom);
1132         obj_surface->private_data = (void *)gen6_avc_surface;
1133         obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface; 
1134     }
1135     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1136     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1137     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1138         dri_bo_reference(gen6_avc_surface->dmv_top);
1139         dri_bo_reference(gen6_avc_surface->dmv_bottom);
1140
1141     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1142     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1143
1144     mfc_context->surface_state.width = obj_surface->orig_width;
1145     mfc_context->surface_state.height = obj_surface->orig_height;
1146     mfc_context->surface_state.w_pitch = obj_surface->width;
1147     mfc_context->surface_state.h_pitch = obj_surface->height;
1148     
1149     /* Setup reference frames and direct mv buffers*/
1150     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1151                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
1152                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1153                         assert(obj_surface);
1154                         if (obj_surface->bo != NULL) {
1155                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1156                                 dri_bo_reference(obj_surface->bo);
1157                         }
1158             /* Check DMV buffer */
1159             if ( obj_surface->private_data == NULL) {
1160                 
1161                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1162                 gen6_avc_surface->dmv_top = 
1163                     dri_bo_alloc(i965->intel.bufmgr,
1164                             "Buffer",
1165                             68*8192, 
1166                             64);
1167                 gen6_avc_surface->dmv_bottom = 
1168                     dri_bo_alloc(i965->intel.bufmgr,
1169                             "Buffer",
1170                             68*8192, 
1171                             64);
1172                 assert(gen6_avc_surface->dmv_top);
1173                 assert(gen6_avc_surface->dmv_bottom);
1174                 obj_surface->private_data = gen6_avc_surface;
1175                 obj_surface->free_private_data = gen6_mfc_free_avc_surface; 
1176             }
1177     
1178             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1179             /* Setup DMV buffer */
1180             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1181             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
1182             dri_bo_reference(gen6_avc_surface->dmv_top);
1183             dri_bo_reference(gen6_avc_surface->dmv_bottom);
1184                 } else {
1185                         break;
1186                 }
1187         }
1188         
1189     obj_surface = SURFACE(encode_state->current_render_target);
1190     assert(obj_surface && obj_surface->bo);
1191     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1192     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1193
1194     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1195     bo = obj_buffer->buffer_store->bo;
1196     assert(bo);
1197     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1198     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1199     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1200     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1201
1202     /*Programing bit rate control */
1203     if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
1204         gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1205
1206     /*Programing bcs pipeline*/
1207     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
1208         
1209     return vaStatus;
1210 }
1211
1212 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
1213                              struct encode_state *encode_state,
1214                              struct gen6_encoder_context *gen6_encoder_context)
1215 {
1216     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1217
1218     intel_batchbuffer_flush(batch);             //run the pipeline
1219
1220     return VA_STATUS_SUCCESS;
1221 }
1222
1223 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
1224                               struct encode_state *encode_state,
1225                               struct gen6_encoder_context *gen6_encoder_context,
1226                               int *encoded_bits_size)
1227 {
1228     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1229     unsigned int *status_mem;
1230     unsigned int buffer_size_bits = 0;
1231     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1232     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1233     int i;
1234
1235     dri_bo_map(mfc_context->macroblock_status_buffer.bo, 1);
1236     status_mem = (unsigned int *)mfc_context->macroblock_status_buffer.bo->virtual;
1237     //Detecting encoder buffer size and bit rate control result
1238     for(i = 0; i < width_in_mbs * height_in_mbs; i++) {
1239         unsigned short current_mb = status_mem[1] >> 16;
1240         buffer_size_bits += current_mb;
1241         status_mem += 4;
1242     }    
1243     dri_bo_unmap(mfc_context->macroblock_status_buffer.bo);
1244
1245     *encoded_bits_size = buffer_size_bits;
1246
1247     return VA_STATUS_SUCCESS;
1248 }
1249
1250 static VAStatus
1251 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1252                             struct encode_state *encode_state,
1253                             struct gen6_encoder_context *gen6_encoder_context)
1254 {
1255     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1256     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1257     int rate_control_mode = pSequenceParameter->rate_control_method;  
1258     int MAX_CBR_INTERATE = 4;
1259     int current_frame_bits_size;
1260     int i;
1261  
1262     for(i = 0; i < MAX_CBR_INTERATE; i++) {
1263         gen6_mfc_init(ctx, gen6_encoder_context);
1264         gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1265         gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1266         gen6_mfc_stop(ctx, encode_state, gen6_encoder_context, &current_frame_bits_size);
1267         if ( rate_control_mode == 0) {
1268             if ( gen6_mfc_bit_rate_control_context_update( encode_state, mfc_context, current_frame_bits_size) )
1269                 break;
1270         } else {
1271             break;
1272         }
1273     }
1274
1275     return VA_STATUS_SUCCESS;
1276 }
1277
1278 VAStatus
1279 gen6_mfc_pipeline(VADriverContextP ctx,
1280                   VAProfile profile,
1281                   struct encode_state *encode_state,
1282                   struct gen6_encoder_context *gen6_encoder_context)
1283 {
1284     VAStatus vaStatus;
1285
1286     switch (profile) {
1287     case VAProfileH264Baseline:
1288         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1289         break;
1290
1291         /* FIXME: add for other profile */
1292     default:
1293         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1294         break;
1295     }
1296
1297     return vaStatus;
1298 }
1299
1300 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1301 {
1302     return True;
1303 }
1304
1305 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1306 {
1307     int i;
1308
1309     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1310     mfc_context->post_deblocking_output.bo = NULL;
1311
1312     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1313     mfc_context->pre_deblocking_output.bo = NULL;
1314
1315     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1316     mfc_context->uncompressed_picture_source.bo = NULL;
1317
1318     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1319     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1320
1321     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1322         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1323         mfc_context->direct_mv_buffers[i].bo = NULL;
1324     }
1325
1326     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1327     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1328
1329         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1330         mfc_context->macroblock_status_buffer.bo = NULL;
1331
1332     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1333     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1334
1335     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1336     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1337
1338
1339     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1340         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1341         mfc_context->reference_surfaces[i].bo = NULL;  
1342     }
1343
1344     return True;
1345 }