Merge branch 'master' into vaapi-ext
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
218
219     BEGIN_BCS_BATCH(batch, 11);
220
221     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224     /* MFX Indirect MV Object Base Address */
225     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226     OUT_BCS_BATCH(batch, 0);    
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
232     OUT_BCS_RELOC(batch,
233                   mfc_context->mfc_indirect_pak_bse_object.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
246 {
247     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
250
251     BEGIN_BCS_BATCH(batch, 11);
252
253     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     /* MFX Indirect MV Object Base Address */
257     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
264     OUT_BCS_RELOC(batch,
265                   mfc_context->mfc_indirect_pak_bse_object.bo,
266                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                   0);
268     OUT_BCS_RELOC(batch,
269                   mfc_context->mfc_indirect_pak_bse_object.bo,
270                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
272
273     ADVANCE_BCS_BATCH(batch);
274 }
275
276 static void
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
278 {
279     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
281
282     BEGIN_BCS_BATCH(batch, 4);
283
284     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
287                   0);
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294 static void
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296                        struct gen6_encoder_context *gen6_encoder_context)
297 {
298     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
301     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
302     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
303
304     BEGIN_BCS_BATCH(batch, 13);
305     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
306     OUT_BCS_BATCH(batch, 
307                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
308     OUT_BCS_BATCH(batch, 
309                   (height_in_mbs << 16) | 
310                   (width_in_mbs << 0));
311     OUT_BCS_BATCH(batch, 
312                   (0 << 24) |     /*Second Chroma QP Offset*/
313                   (0 << 16) |     /*Chroma QP Offset*/
314                   (0 << 14) |   /*Max-bit conformance Intra flag*/
315                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
316                   (1 << 12) |   /*Should always be written as "1" */
317                   (0 << 10) |   /*QM Preset FLag */
318                   (0 << 8)  |   /*Image Structure*/
319                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
320     OUT_BCS_BATCH(batch,
321                   (400 << 16) |   /*Mininum Frame size*/        
322                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
323                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
324                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
325                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
326                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
327                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
328                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
329                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
330                   (pSequenceParameter->direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
331                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
332                   (1 << 2)  |   /*Frame MB only flag*/
333                   (0 << 1)  |   /*MBAFF mode is in active*/
334                   (0 << 0) );   /*Field picture flag*/
335     OUT_BCS_BATCH(batch, 
336                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
337                   (1<<12)   |   
338                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
339                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
340                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
341                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
342                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
343     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
344                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
345                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
346     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
347     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
348     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
349     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
350     OUT_BCS_BATCH(batch, 0x00800001);   
351     OUT_BCS_BATCH(batch, 0);
352
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
358 {
359     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
360     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
361
362     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
363     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
364
365     BEGIN_BCS_BATCH(batch, 16);
366     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
367     OUT_BCS_BATCH(batch,
368                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
369     OUT_BCS_BATCH(batch, 
370                   ((height_in_mbs - 1) << 16) | 
371                   ((width_in_mbs - 1) << 0));
372     OUT_BCS_BATCH(batch, 
373                   (0 << 24) |   /* Second Chroma QP Offset */
374                   (0 << 16) |   /* Chroma QP Offset */
375                   (0 << 14) |   /* Max-bit conformance Intra flag */
376                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
377                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
378                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
379                   (0 << 8)  |   /* FIXME: Image Structure */
380                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
381     OUT_BCS_BATCH(batch,
382                   (0 << 16) |   /* Mininum Frame size */
383                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
384                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
385                   (0 << 13) |   /* CABAC 0 word insertion test enable */
386                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
387                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
388                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
389                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
390                   (0 << 6)  |   /* Only valid for VLD decoding mode */
391                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
392                   (0 << 4)  |   /* Direct 8x8 inference flag */
393                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
394                   (1 << 2)  |   /* Frame MB only flag */
395                   (0 << 1)  |   /* MBAFF mode is in active */
396                   (0 << 0));    /* Field picture flag */
397     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
398     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
399                   (0xBB8 << 16) |       /* InterMbMaxSz */
400                   (0xEE8) );            /* IntraMbMaxSz */
401     OUT_BCS_BATCH(batch, 0);            /* Reserved */
402     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
403     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
404     OUT_BCS_BATCH(batch, 0x8C000000);
405     OUT_BCS_BATCH(batch, 0x00010000);
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0);
408     OUT_BCS_BATCH(batch, 0);
409     OUT_BCS_BATCH(batch, 0);
410
411     ADVANCE_BCS_BATCH(batch);
412 }
413
414 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
415 {
416     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
417     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
418
419     int i;
420
421     BEGIN_BCS_BATCH(batch, 69);
422
423     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
424
425     /* Reference frames and Current frames */
426     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
427         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
428             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
429                   I915_GEM_DOMAIN_INSTRUCTION, 0,
430                   0);
431          } else {
432              OUT_BCS_BATCH(batch, 0);
433          }
434     }
435
436     /* POL list */
437     for(i = 0; i < 32; i++) {
438         OUT_BCS_BATCH(batch, i/2);
439     }
440     OUT_BCS_BATCH(batch, 0);
441     OUT_BCS_BATCH(batch, 0);
442
443     ADVANCE_BCS_BATCH(batch);
444 }
445
446 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
447                                      int slice_type,
448                                      struct encode_state *encode_state,
449                                      struct gen6_encoder_context *gen6_encoder_context,
450                                      int rate_control_enable,
451                                      int qp)
452 {
453     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
454     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
455     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
456     int bit_rate_control_target;
457     if ( slice_type == SLICE_TYPE_I )
458         bit_rate_control_target = 0;
459     else
460         bit_rate_control_target = 1;
461     int maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
462     int maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
463     unsigned char correct[6];
464     int i;
465
466     for (i = 0; i < 6; i++)
467         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
468     unsigned char grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
469                          (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
470     unsigned char shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
471                          (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
472
473     BEGIN_BCS_BATCH(batch, 11);;
474
475     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
476
477         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
478
479     if ( slice_type == SLICE_TYPE_I ) {
480         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
481     } else {
482         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
483     }
484
485     OUT_BCS_BATCH(batch, 
486                   (pSliceParameter->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
487                   (0<<24) |                /*Enable deblocking operation*/
488                   (qp<<16) |                    /*Slice Quantization Parameter*/
489                   0x0202 );
490     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
491     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
492
493     OUT_BCS_BATCH(batch, 
494                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
495                   (1<<30) |             /*ResetRateControlCounter*/
496                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
497                   (4<<24) |     /*RC Stable Tolerance, middle level*/
498                   (rate_control_enable<<23) |     /*RC Panic Enable*/                 
499                   (0<<22) |     /*QP mode, don't modfiy CBP*/
500                   (0<<21) |     /*MB Type Direct Conversion Enabled*/ 
501                   (0<<20) |     /*MB Type Skip Conversion Enabled*/ 
502                   (1<<19) |     /*IsLastSlice*/
503                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
504                   (1<<17) |         /*HeaderPresentFlag*/       
505                   (1<<16) |         /*SliceData PresentFlag*/
506                   (1<<15) |         /*TailPresentFlag*/
507                   (1<<13) |         /*RBSP NAL TYPE*/   
508                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
509         
510     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
511
512     OUT_BCS_BATCH(batch, (maxQpN<<24) |     /*Target QP - 24 is lowest QP*/ 
513                          (maxQpP<<16) |     /*Target QP + 20 is highest QP*/
514                          (shrink<<8)  |
515                          (grow<<0));   
516     OUT_BCS_BATCH(batch, (correct[5] << 20) |
517                          (correct[4] << 16) |
518                          (correct[3] << 12) |
519                          (correct[2] << 8) |
520                          (correct[1] << 4) |
521                          (correct[0] << 0));
522     OUT_BCS_BATCH(batch, 0);
523
524     ADVANCE_BCS_BATCH(batch);
525 }
526 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
527 {
528     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
529     int i;
530
531     BEGIN_BCS_BATCH(batch, 58);
532
533     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
534     OUT_BCS_BATCH(batch, 0xFF ) ; 
535     for( i = 0; i < 56; i++) {
536         OUT_BCS_BATCH(batch, 0x10101010); 
537     }   
538
539     ADVANCE_BCS_BATCH(batch);
540 }
541
542 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
543 {
544     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
545     int i;
546
547     BEGIN_BCS_BATCH(batch, 113);
548     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
549
550     for(i = 0; i < 112;i++) {
551         OUT_BCS_BATCH(batch, 0x10001000);
552     }   
553
554     ADVANCE_BCS_BATCH(batch);   
555 }
556
557 static void
558 gen7_mfc_qm_state(VADriverContextP ctx,
559                   int qm_type,
560                   unsigned int *qm,
561                   int qm_length,
562                   struct gen6_encoder_context *gen6_encoder_context)
563 {
564     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
565     unsigned int qm_buffer[16];
566
567     assert(qm_length <= 16);
568     assert(sizeof(*qm) == 4);
569     memcpy(qm_buffer, qm, qm_length * 4);
570
571     BEGIN_BCS_BATCH(batch, 18);
572     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
573     OUT_BCS_BATCH(batch, qm_type << 0);
574     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
575     ADVANCE_BCS_BATCH(batch);
576 }
577
578 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
579 {
580     unsigned int qm[16] = {
581         0x10101010, 0x10101010, 0x10101010, 0x10101010,
582         0x10101010, 0x10101010, 0x10101010, 0x10101010,
583         0x10101010, 0x10101010, 0x10101010, 0x10101010,
584         0x10101010, 0x10101010, 0x10101010, 0x10101010
585     };
586
587     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
588     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
589     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
590     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
591 }
592
593 static void
594 gen7_mfc_fqm_state(VADriverContextP ctx,
595                    int fqm_type,
596                    unsigned int *fqm,
597                    int fqm_length,
598                    struct gen6_encoder_context *gen6_encoder_context)
599 {
600     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
601     unsigned int fqm_buffer[32];
602
603     assert(fqm_length <= 32);
604     assert(sizeof(*fqm) == 4);
605     memcpy(fqm_buffer, fqm, fqm_length * 4);
606
607     BEGIN_BCS_BATCH(batch, 34);
608     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
609     OUT_BCS_BATCH(batch, fqm_type << 0);
610     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
611     ADVANCE_BCS_BATCH(batch);
612 }
613
614 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
615 {
616     unsigned int qm[32] = {
617         0x10001000, 0x10001000, 0x10001000, 0x10001000,
618         0x10001000, 0x10001000, 0x10001000, 0x10001000,
619         0x10001000, 0x10001000, 0x10001000, 0x10001000,
620         0x10001000, 0x10001000, 0x10001000, 0x10001000,
621         0x10001000, 0x10001000, 0x10001000, 0x10001000,
622         0x10001000, 0x10001000, 0x10001000, 0x10001000,
623         0x10001000, 0x10001000, 0x10001000, 0x10001000,
624         0x10001000, 0x10001000, 0x10001000, 0x10001000
625     };
626
627     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
628     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
629     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
630     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
631 }
632
633 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
634 {
635     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
636     int i;
637
638         BEGIN_BCS_BATCH(batch, 10);
639         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
640         OUT_BCS_BATCH(batch, 0);                  //Select L0
641         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
642         for(i = 0; i < 7; i++) {
643                 OUT_BCS_BATCH(batch, 0x80808080);
644         }   
645         ADVANCE_BCS_BATCH(batch);
646
647         BEGIN_BCS_BATCH(batch, 10);
648         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
649         OUT_BCS_BATCH(batch, 1);                  //Select L1
650         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
651         for(i = 0; i < 7; i++) {
652                 OUT_BCS_BATCH(batch, 0x80808080);
653         }   
654         ADVANCE_BCS_BATCH(batch);
655 }
656         
657 static void
658 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
659                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
660                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
661 {
662     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
663
664     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
665
666     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
667     OUT_BCS_BATCH(batch,
668                   (0 << 16) |   /* always start at offset 0 */
669                   (data_bits_in_last_dw << 8) |
670                   (skip_emul_byte_count << 4) |
671                   (!!emulation_flag << 3) |
672                   ((!!is_last_header) << 2) |
673                   ((!!is_end_of_slice) << 1) |
674                   (0 << 0));    /* FIXME: ??? */
675
676     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
677     ADVANCE_BCS_BATCH(batch);
678 }
679
680 static int
681 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
682                               struct gen6_encoder_context *gen6_encoder_context,
683                               unsigned char target_mb_size, unsigned char max_mb_size)
684 {
685     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
686     int len_in_dwords = 11;
687
688     BEGIN_BCS_BATCH(batch, len_in_dwords);
689
690     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
691     OUT_BCS_BATCH(batch, 0);
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 
694                   (0 << 24) |           /* PackedMvNum, Debug*/
695                   (0 << 20) |           /* No motion vector */
696                   (1 << 19) |           /* CbpDcY */
697                   (1 << 18) |           /* CbpDcU */
698                   (1 << 17) |           /* CbpDcV */
699                   (msg[0] & 0xFFFF) );
700
701     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
702     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
703     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
704
705     /*Stuff for Intra MB*/
706     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
707     OUT_BCS_BATCH(batch, msg[2]);       
708     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
709     
710     /*MaxSizeInWord and TargetSzieInWord*/
711     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
712                          (target_mb_size << 16) );
713
714     ADVANCE_BCS_BATCH(batch);
715
716     return len_in_dwords;
717 }
718
719 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
720                                          struct gen6_encoder_context *gen6_encoder_context,
721                                          unsigned char target_mb_size,unsigned char max_mb_size, int slice_type)
722 {
723     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
724     int len_in_dwords = 11;
725
726     BEGIN_BCS_BATCH(batch, len_in_dwords);
727
728     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
729
730     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
731     OUT_BCS_BATCH(batch, offset);
732
733     OUT_BCS_BATCH(batch, 
734                   (1 << 24) |     /* PackedMvNum, Debug*/
735                   (4 << 20) |     /* 8 MV, SNB don't use it*/
736                   (1 << 19) |     /* CbpDcY */
737                   (1 << 18) |     /* CbpDcU */
738                   (1 << 17) |     /* CbpDcV */
739                   (0 << 15) |     /* Transform8x8Flag = 0*/
740                   (0 << 14) |     /* Frame based*/
741                   (0 << 13) |     /* Inter MB */
742                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
743                   (0 << 7)  |     /* MBZ for frame */
744                   (0 << 6)  |     /* MBZ */
745                   (2 << 4)  |     /* MBZ for inter*/
746                   (0 << 3)  |     /* MBZ */
747                   (0 << 2)  |     /* SkipMbFlag */
748                   (0 << 0));      /* InterMbMode */
749
750     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
751     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
752 #if 0 
753     if ( slice_type == SLICE_TYPE_B) {
754         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
755     } else {
756         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
757     }
758 #else
759     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
760 #endif
761
762
763     /*Stuff for Inter MB*/
764     OUT_BCS_BATCH(batch, 0x0);        
765     OUT_BCS_BATCH(batch, 0x0);    
766     OUT_BCS_BATCH(batch, 0x0);        
767
768     /*MaxSizeInWord and TargetSzieInWord*/
769     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
770                          (target_mb_size << 16) );
771
772     ADVANCE_BCS_BATCH(batch);
773
774     return len_in_dwords;
775 }
776
777 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
778 {
779     struct i965_driver_data *i965 = i965_driver_data(ctx);
780     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
781     dri_bo *bo;
782     int i;
783
784     /*Encode common setup for MFC*/
785     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
786     mfc_context->post_deblocking_output.bo = NULL;
787
788     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
789     mfc_context->pre_deblocking_output.bo = NULL;
790
791     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
792     mfc_context->uncompressed_picture_source.bo = NULL;
793
794     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
795     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
796
797     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
798         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
799             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
800         mfc_context->direct_mv_buffers[i].bo = NULL;
801     }
802
803     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
804         if (mfc_context->reference_surfaces[i].bo != NULL)
805             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
806         mfc_context->reference_surfaces[i].bo = NULL;  
807     }
808
809     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
810     bo = dri_bo_alloc(i965->intel.bufmgr,
811                       "Buffer",
812                       128 * 64,
813                       64);
814     assert(bo);
815     mfc_context->intra_row_store_scratch_buffer.bo = bo;
816
817     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
818     bo = dri_bo_alloc(i965->intel.bufmgr,
819                       "Buffer",
820                       8*9600,
821                       64);
822     assert(bo);
823     mfc_context->macroblock_status_buffer.bo = bo;
824
825     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
826     bo = dri_bo_alloc(i965->intel.bufmgr,
827                       "Buffer",
828                       49152,  /* 6 * 128 * 64 */
829                       64);
830     assert(bo);
831     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
832
833     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
834     bo = dri_bo_alloc(i965->intel.bufmgr,
835                       "Buffer",
836                       12288, /* 1.5 * 128 * 64 */
837                       0x1000);
838     assert(bo);
839     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
840 }
841
842 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
843                                       struct encode_state *encode_state,
844                                       struct gen6_encoder_context *gen6_encoder_context)
845 {
846     struct i965_driver_data *i965 = i965_driver_data(ctx);
847     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
848     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
849     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
850     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
851     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
852     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
853     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
854     unsigned int *msg = NULL, offset = 0;
855     int emit_new_state = 1, object_len_in_bytes;
856     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
857     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
858     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
859     int x,y;
860     int rate_control_mode = pSequenceParameter->rate_control_method; 
861     unsigned char target_mb_size = mfc_context->bit_rate_control_context[1-is_intra].TargetSizeInWord;
862     unsigned char max_mb_size = mfc_context->bit_rate_control_context[1-is_intra].MaxSizeInWord;
863     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
864     unsigned char *slice_header = NULL;
865     int slice_header_length_in_bits = 0;
866     unsigned int tail_data[] = { 0x0 };
867
868     if (encode_state->dec_ref_pic_marking)
869         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
870
871     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
872
873     if ( rate_control_mode == 0) {
874         qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
875     }
876
877     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
878     
879     if (is_intra) {
880         dri_bo_map(vme_context->vme_output.bo , 1);
881         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
882     }
883
884     for (y = 0; y < height_in_mbs; y++) {
885         for (x = 0; x < width_in_mbs; x++) { 
886             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
887             
888             if (emit_new_state) {
889                 intel_batchbuffer_emit_mi_flush(batch);
890                 
891                 if (IS_GEN7(i965->intel.device_id)) {
892                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
893                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
894                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
895                 } else {
896                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
897                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
898                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
899                 }
900
901                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
902                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
903
904                 if (IS_GEN7(i965->intel.device_id)) {
905                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
906                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
907                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
908                 } else {
909                     gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
910                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
911                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
912                 }
913
914                 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context); 
915                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
916                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, 
917                                          encode_state, gen6_encoder_context, 
918                                          rate_control_mode == 0, pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta);
919
920                 if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
921                     VAEncPackedHeaderParameterBuffer *param = NULL;
922                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
923                     unsigned int length_in_bits;
924
925                     assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
926                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
927                     length_in_bits = param->length_in_bits[0];
928
929                     gen6_mfc_avc_insert_object(ctx, 
930                                                gen6_encoder_context,
931                                                header_data,
932                                                ALIGN(length_in_bits, 32) >> 5,
933                                                length_in_bits & 0x1f,
934                                                param->skip_emulation_check_count,
935                                                0,
936                                                0,
937                                                param->insert_emulation_bytes);
938                 }
939
940                 if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
941                     VAEncPackedHeaderParameterBuffer *param = NULL;
942                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
943                     unsigned int length_in_bits;
944
945                     assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
946                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
947                     length_in_bits = param->length_in_bits[0];
948
949                     gen6_mfc_avc_insert_object(ctx, 
950                                                gen6_encoder_context,
951                                                header_data,
952                                                ALIGN(length_in_bits, 32) >> 5,
953                                                length_in_bits & 0x1f,
954                                                param->skip_emulation_check_count,
955                                                0,
956                                                0,
957                                                param->insert_emulation_bytes);
958                 }
959
960                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
961                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
962                                            5,  /* first 5 bytes are start code + nal unit type */
963                                            1, 0, 1);
964                 emit_new_state = 0;
965             }
966
967             if (is_intra) {
968                 assert(msg);
969                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context,target_mb_size, max_mb_size);
970                 msg += 4;
971             } else {
972                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, target_mb_size, max_mb_size, pSliceParameter->slice_type);
973                 offset += 64;
974             }
975
976             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
977                 assert(0);
978                 intel_batchbuffer_end_atomic(batch);
979                 intel_batchbuffer_flush(batch);
980                 emit_new_state = 1;
981                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
982             }
983         }
984     }
985
986     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
987                                tail_data, sizeof(tail_data) >> 2, 32,
988                                sizeof(tail_data), 1, 1, 1);
989
990     if (is_intra)
991         dri_bo_unmap(vme_context->vme_output.bo);
992
993     free(slice_header);
994
995     intel_batchbuffer_end_atomic(batch);
996 }
997
998 static void 
999 gen6_mfc_free_avc_surface(void **data)
1000 {
1001     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
1002
1003     if (!avc_surface)
1004         return;
1005
1006     dri_bo_unreference(avc_surface->dmv_top);
1007     avc_surface->dmv_top = NULL;
1008     dri_bo_unreference(avc_surface->dmv_bottom);
1009     avc_surface->dmv_bottom = NULL;
1010
1011     free(avc_surface);
1012     *data = NULL;
1013 }
1014
1015 static void gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
1016                                                    struct gen6_mfc_context *mfc_context) 
1017 {
1018     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1019     
1020     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1021     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1022     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
1023     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
1024     int intra_mb_size = inter_mb_size * 5.0;
1025     int i;
1026     
1027     mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
1028     mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
1029     mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
1030     mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
1031
1032     for(i = 0 ; i < 2; i++) {
1033         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
1034         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
1035         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
1036         mfc_context->bit_rate_control_context[i].GrowInit = 6;
1037         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
1038         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
1039         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
1040         
1041         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
1042         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
1043         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
1044         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
1045         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
1046         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
1047     }
1048     
1049     mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
1050     mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
1051
1052     mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
1053     mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
1054 }
1055
1056 static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state, 
1057                                                    struct gen6_mfc_context *mfc_context,
1058                                                    int current_frame_size) 
1059 {
1060     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
1061     int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
1062     int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
1063
1064     /*
1065     printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index, 
1066             mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
1067             mfc_context->bit_rate_control_context[control_index].target_frame_size );
1068     */
1069
1070     if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
1071         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
1072     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
1073         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
1074     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
1075         mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
1076     } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
1077         mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
1078     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 )  {
1079          mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
1080     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 )  {
1081          mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
1082     } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 )  {
1083          mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
1084     }
1085     
1086     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
1087         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
1088     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
1089         mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
1090  
1091     if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
1092         return 0;
1093
1094     return 1;
1095 }
1096
1097 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
1098                                      struct encode_state *encode_state,
1099                                      struct gen6_encoder_context *gen6_encoder_context)
1100 {
1101     struct i965_driver_data *i965 = i965_driver_data(ctx);
1102     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1103     struct object_surface *obj_surface; 
1104     struct object_buffer *obj_buffer;
1105     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1106     dri_bo *bo;
1107     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1108     VAStatus vaStatus = VA_STATUS_SUCCESS;
1109         int i;
1110
1111     /*Setup all the input&output object*/
1112
1113     /* Setup current frame and current direct mv buffer*/
1114     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1115     assert(obj_surface);
1116     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1117     if ( obj_surface->private_data == NULL) {
1118         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1119         gen6_avc_surface->dmv_top = 
1120             dri_bo_alloc(i965->intel.bufmgr,
1121                     "Buffer",
1122                     68*8192, 
1123                     64);
1124         gen6_avc_surface->dmv_bottom = 
1125             dri_bo_alloc(i965->intel.bufmgr,
1126                             "Buffer",
1127                             68*8192, 
1128                             64);
1129         assert(gen6_avc_surface->dmv_top);
1130         assert(gen6_avc_surface->dmv_bottom);
1131         obj_surface->private_data = (void *)gen6_avc_surface;
1132         obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface; 
1133     }
1134     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1135     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1136     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1137         dri_bo_reference(gen6_avc_surface->dmv_top);
1138         dri_bo_reference(gen6_avc_surface->dmv_bottom);
1139
1140     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1141     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1142
1143     mfc_context->surface_state.width = obj_surface->orig_width;
1144     mfc_context->surface_state.height = obj_surface->orig_height;
1145     mfc_context->surface_state.w_pitch = obj_surface->width;
1146     mfc_context->surface_state.h_pitch = obj_surface->height;
1147     
1148     /* Setup reference frames and direct mv buffers*/
1149     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1150                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
1151                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1152                         assert(obj_surface);
1153                         if (obj_surface->bo != NULL) {
1154                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1155                                 dri_bo_reference(obj_surface->bo);
1156                         }
1157             /* Check DMV buffer */
1158             if ( obj_surface->private_data == NULL) {
1159                 
1160                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1161                 gen6_avc_surface->dmv_top = 
1162                     dri_bo_alloc(i965->intel.bufmgr,
1163                             "Buffer",
1164                             68*8192, 
1165                             64);
1166                 gen6_avc_surface->dmv_bottom = 
1167                     dri_bo_alloc(i965->intel.bufmgr,
1168                             "Buffer",
1169                             68*8192, 
1170                             64);
1171                 assert(gen6_avc_surface->dmv_top);
1172                 assert(gen6_avc_surface->dmv_bottom);
1173                 obj_surface->private_data = gen6_avc_surface;
1174                 obj_surface->free_private_data = gen6_mfc_free_avc_surface; 
1175             }
1176     
1177             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1178             /* Setup DMV buffer */
1179             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1180             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
1181             dri_bo_reference(gen6_avc_surface->dmv_top);
1182             dri_bo_reference(gen6_avc_surface->dmv_bottom);
1183                 } else {
1184                         break;
1185                 }
1186         }
1187         
1188     obj_surface = SURFACE(encode_state->current_render_target);
1189     assert(obj_surface && obj_surface->bo);
1190     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1191     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1192
1193     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1194     bo = obj_buffer->buffer_store->bo;
1195     assert(bo);
1196     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1197     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1198     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1199     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1200
1201     /*Programing bit rate control */
1202     if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
1203         gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1204
1205     /*Programing bcs pipeline*/
1206     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
1207         
1208     return vaStatus;
1209 }
1210
1211 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
1212                              struct encode_state *encode_state,
1213                              struct gen6_encoder_context *gen6_encoder_context)
1214 {
1215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1216
1217     intel_batchbuffer_flush(batch);             //run the pipeline
1218
1219     return VA_STATUS_SUCCESS;
1220 }
1221
1222 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
1223                               struct encode_state *encode_state,
1224                               struct gen6_encoder_context *gen6_encoder_context,
1225                               int *encoded_bits_size)
1226 {
1227     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1228     unsigned int *status_mem;
1229     unsigned int buffer_size_bits = 0;
1230     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1231     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1232     int i;
1233
1234     dri_bo_map(mfc_context->macroblock_status_buffer.bo, 1);
1235     status_mem = (unsigned int *)mfc_context->macroblock_status_buffer.bo->virtual;
1236     //Detecting encoder buffer size and bit rate control result
1237     for(i = 0; i < width_in_mbs * height_in_mbs; i++) {
1238         unsigned short current_mb = status_mem[1] >> 16;
1239         buffer_size_bits += current_mb;
1240         status_mem += 4;
1241     }    
1242     dri_bo_unmap(mfc_context->macroblock_status_buffer.bo);
1243
1244     *encoded_bits_size = buffer_size_bits;
1245
1246     return VA_STATUS_SUCCESS;
1247 }
1248
1249 static VAStatus
1250 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1251                             struct encode_state *encode_state,
1252                             struct gen6_encoder_context *gen6_encoder_context)
1253 {
1254     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1255     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1256     int rate_control_mode = pSequenceParameter->rate_control_method;  
1257     int MAX_CBR_INTERATE = 4;
1258     int current_frame_bits_size;
1259     int i;
1260  
1261     for(i = 0; i < MAX_CBR_INTERATE; i++) {
1262         gen6_mfc_init(ctx, gen6_encoder_context);
1263         gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1264         gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1265         gen6_mfc_stop(ctx, encode_state, gen6_encoder_context, &current_frame_bits_size);
1266         if ( rate_control_mode == 0) {
1267             if ( gen6_mfc_bit_rate_control_context_update( encode_state, mfc_context, current_frame_bits_size) )
1268                 break;
1269         } else {
1270             break;
1271         }
1272     }
1273
1274     return VA_STATUS_SUCCESS;
1275 }
1276
1277 VAStatus
1278 gen6_mfc_pipeline(VADriverContextP ctx,
1279                   VAProfile profile,
1280                   struct encode_state *encode_state,
1281                   struct gen6_encoder_context *gen6_encoder_context)
1282 {
1283     VAStatus vaStatus;
1284
1285     switch (profile) {
1286     case VAProfileH264Baseline:
1287         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1288         break;
1289
1290         /* FIXME: add for other profile */
1291     default:
1292         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1293         break;
1294     }
1295
1296     return vaStatus;
1297 }
1298
1299 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1300 {
1301     return True;
1302 }
1303
1304 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1305 {
1306     int i;
1307
1308     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1309     mfc_context->post_deblocking_output.bo = NULL;
1310
1311     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1312     mfc_context->pre_deblocking_output.bo = NULL;
1313
1314     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1315     mfc_context->uncompressed_picture_source.bo = NULL;
1316
1317     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1318     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1319
1320     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1321         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1322         mfc_context->direct_mv_buffers[i].bo = NULL;
1323     }
1324
1325     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1326     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1327
1328         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1329         mfc_context->macroblock_status_buffer.bo = NULL;
1330
1331     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1332     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1333
1334     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1335     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1336
1337
1338     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1339         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1340         mfc_context->reference_surfaces[i].bo = NULL;  
1341     }
1342
1343     return True;
1344 }