i965_drv_vidoe: update packed header iterface
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (0 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
218
219     BEGIN_BCS_BATCH(batch, 11);
220
221     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224     /* MFX Indirect MV Object Base Address */
225     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226     OUT_BCS_BATCH(batch, 0);    
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
232     OUT_BCS_RELOC(batch,
233                   mfc_context->mfc_indirect_pak_bse_object.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
246 {
247     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
250
251     BEGIN_BCS_BATCH(batch, 11);
252
253     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     /* MFX Indirect MV Object Base Address */
257     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
264     OUT_BCS_RELOC(batch,
265                   mfc_context->mfc_indirect_pak_bse_object.bo,
266                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                   0);
268     OUT_BCS_RELOC(batch,
269                   mfc_context->mfc_indirect_pak_bse_object.bo,
270                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
272
273     ADVANCE_BCS_BATCH(batch);
274 }
275
276 static void
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
278 {
279     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
281
282     BEGIN_BCS_BATCH(batch, 4);
283
284     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
287                   0);
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294 static void
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296                        struct gen6_encoder_context *gen6_encoder_context)
297 {
298     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
301     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
302     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
303
304     BEGIN_BCS_BATCH(batch, 13);
305     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
306     OUT_BCS_BATCH(batch, 
307                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
308     OUT_BCS_BATCH(batch, 
309                   (height_in_mbs << 16) | 
310                   (width_in_mbs << 0));
311     OUT_BCS_BATCH(batch, 
312                   (0 << 24) |     /*Second Chroma QP Offset*/
313                   (0 << 16) |     /*Chroma QP Offset*/
314                   (0 << 14) |   /*Max-bit conformance Intra flag*/
315                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
316                   (1 << 12) |   /*Should always be written as "1" */
317                   (0 << 10) |   /*QM Preset FLag */
318                   (0 << 8)  |   /*Image Structure*/
319                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
320     OUT_BCS_BATCH(batch,
321                   (400 << 16) |   /*Mininum Frame size*/        
322                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
323                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
324                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
325                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
326                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
327                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
328                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
329                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
330                   (pSequenceParameter->direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
331                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
332                   (1 << 2)  |   /*Frame MB only flag*/
333                   (0 << 1)  |   /*MBAFF mode is in active*/
334                   (0 << 0) );   /*Field picture flag*/
335     OUT_BCS_BATCH(batch, 
336                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
337                   (1<<12)   |   
338                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
339                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
340                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
341                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
342                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
343     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
344                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
345                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
346     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
347     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
348     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
349     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
350     OUT_BCS_BATCH(batch, 0x00800001);   
351     OUT_BCS_BATCH(batch, 0);
352
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
358 {
359     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
360     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
361
362     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
363     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
364
365     BEGIN_BCS_BATCH(batch, 16);
366     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
367     OUT_BCS_BATCH(batch,
368                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
369     OUT_BCS_BATCH(batch, 
370                   ((height_in_mbs - 1) << 16) | 
371                   ((width_in_mbs - 1) << 0));
372     OUT_BCS_BATCH(batch, 
373                   (0 << 24) |   /* Second Chroma QP Offset */
374                   (0 << 16) |   /* Chroma QP Offset */
375                   (0 << 14) |   /* Max-bit conformance Intra flag */
376                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
377                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
378                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
379                   (0 << 8)  |   /* FIXME: Image Structure */
380                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
381     OUT_BCS_BATCH(batch,
382                   (0 << 16) |   /* Mininum Frame size */
383                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
384                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
385                   (0 << 13) |   /* CABAC 0 word insertion test enable */
386                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
387                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
388                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
389                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
390                   (0 << 6)  |   /* Only valid for VLD decoding mode */
391                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
392                   (0 << 4)  |   /* Direct 8x8 inference flag */
393                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
394                   (1 << 2)  |   /* Frame MB only flag */
395                   (0 << 1)  |   /* MBAFF mode is in active */
396                   (0 << 0));    /* Field picture flag */
397     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
398     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
399                   (0xBB8 << 16) |       /* InterMbMaxSz */
400                   (0xEE8) );            /* IntraMbMaxSz */
401     OUT_BCS_BATCH(batch, 0);            /* Reserved */
402     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
403     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
404     OUT_BCS_BATCH(batch, 0x8C000000);
405     OUT_BCS_BATCH(batch, 0x00010000);
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0);
408     OUT_BCS_BATCH(batch, 0);
409     OUT_BCS_BATCH(batch, 0);
410
411     ADVANCE_BCS_BATCH(batch);
412 }
413
414 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
415 {
416     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
417     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
418
419     int i;
420
421     BEGIN_BCS_BATCH(batch, 69);
422
423     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
424
425     /* Reference frames and Current frames */
426     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
427         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
428             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
429                   I915_GEM_DOMAIN_INSTRUCTION, 0,
430                   0);
431          } else {
432              OUT_BCS_BATCH(batch, 0);
433          }
434     }
435
436     /* POL list */
437     for(i = 0; i < 32; i++) {
438         OUT_BCS_BATCH(batch, i/2);
439     }
440     OUT_BCS_BATCH(batch, 0);
441     OUT_BCS_BATCH(batch, 0);
442
443     ADVANCE_BCS_BATCH(batch);
444 }
445
446 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
447                                      int slice_type,
448                                      struct encode_state *encode_state,
449                                      struct gen6_encoder_context *gen6_encoder_context,
450                                      int rate_control_enable,
451                                      int qp)
452 {
453     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
454     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
455     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
456
457     BEGIN_BCS_BATCH(batch, 11);;
458
459     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
460
461         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
462
463     if ( slice_type == SLICE_TYPE_I )
464         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
465     else 
466         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
467
468     OUT_BCS_BATCH(batch, 
469                   (pSliceParameter->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
470                   (0<<24) |                /*Enable deblocking operation*/
471                   (qp<<16) |                    /*Slice Quantization Parameter*/
472                   0x0202 );
473     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
474     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
475
476     OUT_BCS_BATCH(batch, 
477                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
478                   (1<<30) |             /*ResetRateControlCounter*/
479                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
480                   (8<<24) |     /*RC Stable Tolerance, middle level*/
481                   (rate_control_enable<<23) |     /*RC Panic Enable*/                 
482                   (0<<22) |     /*QP mode, don't modfiy CBP*/
483                   (0<<21) |     /*MB Type Direct Conversion Disable*/ 
484                   (0<<20) |     /*MB Type Skip Conversion Disable*/ 
485                   (1<<19) |     /*IsLastSlice*/
486                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
487                   (1<<17) |         /*HeaderPresentFlag*/       
488                   (1<<16) |         /*SliceData PresentFlag*/
489                   (1<<15) |         /*TailPresentFlag*/
490                   (1<<13) |         /*RBSP NAL TYPE*/   
491                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
492         
493     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
494
495     OUT_BCS_BATCH(batch, (24<<24) |     /*Target QP - 24 is lowest QP*/ 
496                          (20<<16) |     /*Target QP + 20 is highest QP*/
497                          (8<<12)  |
498                          (8<<8)   |
499                          (8<<4)   |
500                          (8<<0));
501     OUT_BCS_BATCH(batch, 0x08888888);   
502     OUT_BCS_BATCH(batch, 0);
503
504     ADVANCE_BCS_BATCH(batch);
505 }
506 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
507 {
508     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
509     int i;
510
511     BEGIN_BCS_BATCH(batch, 58);
512
513     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
514     OUT_BCS_BATCH(batch, 0xFF ) ; 
515     for( i = 0; i < 56; i++) {
516         OUT_BCS_BATCH(batch, 0x10101010); 
517     }   
518
519     ADVANCE_BCS_BATCH(batch);
520 }
521
522 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
523 {
524     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
525     int i;
526
527     BEGIN_BCS_BATCH(batch, 113);
528     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
529
530     for(i = 0; i < 112;i++) {
531         OUT_BCS_BATCH(batch, 0x10001000);
532     }   
533
534     ADVANCE_BCS_BATCH(batch);   
535 }
536
537 static void
538 gen7_mfc_qm_state(VADriverContextP ctx,
539                   int qm_type,
540                   unsigned int *qm,
541                   int qm_length,
542                   struct gen6_encoder_context *gen6_encoder_context)
543 {
544     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
545     unsigned int qm_buffer[16];
546
547     assert(qm_length <= 16);
548     assert(sizeof(*qm) == 4);
549     memcpy(qm_buffer, qm, qm_length * 4);
550
551     BEGIN_BCS_BATCH(batch, 18);
552     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
553     OUT_BCS_BATCH(batch, qm_type << 0);
554     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
555     ADVANCE_BCS_BATCH(batch);
556 }
557
558 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
559 {
560     unsigned int qm[16] = {
561         0x10101010, 0x10101010, 0x10101010, 0x10101010,
562         0x10101010, 0x10101010, 0x10101010, 0x10101010,
563         0x10101010, 0x10101010, 0x10101010, 0x10101010,
564         0x10101010, 0x10101010, 0x10101010, 0x10101010
565     };
566
567     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
568     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
569     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
570     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
571 }
572
573 static void
574 gen7_mfc_fqm_state(VADriverContextP ctx,
575                    int fqm_type,
576                    unsigned int *fqm,
577                    int fqm_length,
578                    struct gen6_encoder_context *gen6_encoder_context)
579 {
580     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
581     unsigned int fqm_buffer[32];
582
583     assert(fqm_length <= 32);
584     assert(sizeof(*fqm) == 4);
585     memcpy(fqm_buffer, fqm, fqm_length * 4);
586
587     BEGIN_BCS_BATCH(batch, 34);
588     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
589     OUT_BCS_BATCH(batch, fqm_type << 0);
590     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
591     ADVANCE_BCS_BATCH(batch);
592 }
593
594 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
595 {
596     unsigned int qm[32] = {
597         0x10001000, 0x10001000, 0x10001000, 0x10001000,
598         0x10001000, 0x10001000, 0x10001000, 0x10001000,
599         0x10001000, 0x10001000, 0x10001000, 0x10001000,
600         0x10001000, 0x10001000, 0x10001000, 0x10001000,
601         0x10001000, 0x10001000, 0x10001000, 0x10001000,
602         0x10001000, 0x10001000, 0x10001000, 0x10001000,
603         0x10001000, 0x10001000, 0x10001000, 0x10001000,
604         0x10001000, 0x10001000, 0x10001000, 0x10001000
605     };
606
607     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
608     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
609     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
610     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
611 }
612
613 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
614 {
615     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
616     int i;
617
618         BEGIN_BCS_BATCH(batch, 10);
619         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
620         OUT_BCS_BATCH(batch, 0);                  //Select L0
621         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
622         for(i = 0; i < 7; i++) {
623                 OUT_BCS_BATCH(batch, 0x80808080);
624         }   
625         ADVANCE_BCS_BATCH(batch);
626
627         BEGIN_BCS_BATCH(batch, 10);
628         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
629         OUT_BCS_BATCH(batch, 1);                  //Select L1
630         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
631         for(i = 0; i < 7; i++) {
632                 OUT_BCS_BATCH(batch, 0x80808080);
633         }   
634         ADVANCE_BCS_BATCH(batch);
635 }
636         
637 static void
638 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
639                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
640                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
641 {
642     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
643
644     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
645
646     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
647     OUT_BCS_BATCH(batch,
648                   (0 << 16) |   /* always start at offset 0 */
649                   (data_bits_in_last_dw << 8) |
650                   (skip_emul_byte_count << 4) |
651                   (!!emulation_flag << 3) |
652                   ((!!is_last_header) << 2) |
653                   ((!!is_end_of_slice) << 1) |
654                   (0 << 0));    /* FIXME: ??? */
655
656     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
657     ADVANCE_BCS_BATCH(batch);
658 }
659
660 static int
661 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
662                               struct gen6_encoder_context *gen6_encoder_context,
663                               int intra_mb_size_in_bits)
664 {
665     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
666     int len_in_dwords = 11;
667     unsigned char target_mb_size = intra_mb_size_in_bits / 16;     //In Words
668     unsigned char max_mb_size = target_mb_size * 2 > 255? 255: target_mb_size * 2 ;
669
670     BEGIN_BCS_BATCH(batch, len_in_dwords);
671
672     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
673     OUT_BCS_BATCH(batch, 0);
674     OUT_BCS_BATCH(batch, 0);
675     OUT_BCS_BATCH(batch, 
676                   (0 << 24) |           /* PackedMvNum, Debug*/
677                   (0 << 20) |           /* No motion vector */
678                   (1 << 19) |           /* CbpDcY */
679                   (1 << 18) |           /* CbpDcU */
680                   (1 << 17) |           /* CbpDcV */
681                   (msg[0] & 0xFFFF) );
682
683     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
684     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
685     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
686
687     /*Stuff for Intra MB*/
688     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
689     OUT_BCS_BATCH(batch, msg[2]);       
690     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
691     
692     /*MaxSizeInWord and TargetSzieInWord*/
693     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
694                          (target_mb_size << 16) );
695
696     ADVANCE_BCS_BATCH(batch);
697
698     return len_in_dwords;
699 }
700
701 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
702                                          struct gen6_encoder_context *gen6_encoder_context,
703                                          int inter_mb_size_in_bits, int slice_type)
704 {
705     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
706     int len_in_dwords = 11;
707     unsigned char target_mb_size = inter_mb_size_in_bits / 16;     //In Words
708     unsigned char max_mb_size = target_mb_size * 16 > 255? 255: target_mb_size * 16 ;
709
710     BEGIN_BCS_BATCH(batch, len_in_dwords);
711
712     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
713
714     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
715     OUT_BCS_BATCH(batch, offset);
716
717     OUT_BCS_BATCH(batch, 
718                   (1 << 24) |     /* PackedMvNum, Debug*/
719                   (4 << 20) |     /* 8 MV, SNB don't use it*/
720                   (1 << 19) |     /* CbpDcY */
721                   (1 << 18) |     /* CbpDcU */
722                   (1 << 17) |     /* CbpDcV */
723                   (0 << 15) |     /* Transform8x8Flag = 0*/
724                   (0 << 14) |     /* Frame based*/
725                   (0 << 13) |     /* Inter MB */
726                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
727                   (0 << 7)  |     /* MBZ for frame */
728                   (0 << 6)  |     /* MBZ */
729                   (2 << 4)  |     /* MBZ for inter*/
730                   (0 << 3)  |     /* MBZ */
731                   (0 << 2)  |     /* SkipMbFlag */
732                   (0 << 0));      /* InterMbMode */
733
734     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
735     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
736 #if 0 
737     if ( slice_type == SLICE_TYPE_B) {
738         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
739     } else {
740         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
741     }
742 #else
743     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
744 #endif
745
746
747     /*Stuff for Inter MB*/
748     OUT_BCS_BATCH(batch, 0x0);        
749     OUT_BCS_BATCH(batch, 0x0);    
750     OUT_BCS_BATCH(batch, 0x0);        
751
752     /*MaxSizeInWord and TargetSzieInWord*/
753     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
754                          (target_mb_size << 16) );
755
756     ADVANCE_BCS_BATCH(batch);
757
758     return len_in_dwords;
759 }
760
761 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
762 {
763     struct i965_driver_data *i965 = i965_driver_data(ctx);
764     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
765     dri_bo *bo;
766     int i;
767
768     /*Encode common setup for MFC*/
769     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
770     mfc_context->post_deblocking_output.bo = NULL;
771
772     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
773     mfc_context->pre_deblocking_output.bo = NULL;
774
775     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
776     mfc_context->uncompressed_picture_source.bo = NULL;
777
778     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
779     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
780
781     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
782         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
783             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
784         mfc_context->direct_mv_buffers[i].bo = NULL;
785     }
786
787     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
788         if (mfc_context->reference_surfaces[i].bo != NULL)
789             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
790         mfc_context->reference_surfaces[i].bo = NULL;  
791     }
792
793     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
794     bo = dri_bo_alloc(i965->intel.bufmgr,
795                       "Buffer",
796                       128 * 64,
797                       64);
798     assert(bo);
799     mfc_context->intra_row_store_scratch_buffer.bo = bo;
800
801     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
802     bo = dri_bo_alloc(i965->intel.bufmgr,
803                       "Buffer",
804                       4*9600,
805                       64);
806     assert(bo);
807     mfc_context->macroblock_status_buffer.bo = bo;
808
809     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
810     bo = dri_bo_alloc(i965->intel.bufmgr,
811                       "Buffer",
812                       49152,  /* 6 * 128 * 64 */
813                       64);
814     assert(bo);
815     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
816
817     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
818     bo = dri_bo_alloc(i965->intel.bufmgr,
819                       "Buffer",
820                       12288, /* 1.5 * 128 * 64 */
821                       0x1000);
822     assert(bo);
823     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
824 }
825
826 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
827                                       struct encode_state *encode_state,
828                                       struct gen6_encoder_context *gen6_encoder_context)
829 {
830     struct i965_driver_data *i965 = i965_driver_data(ctx);
831     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
832     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
833     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
834     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
835     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
836     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
837     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
838     unsigned int *msg = NULL, offset = 0;
839     int emit_new_state = 1, object_len_in_bytes;
840     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
841     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
842     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
843     int x,y;
844     int rate_control_mode = pSequenceParameter->rate_control_method; 
845     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
846     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / fps / width_in_mbs / height_in_mbs;
847     int intra_mb_size = inter_mb_size * 5.0;
848     int qp = pPicParameter->pic_init_qp;
849     unsigned char *slice_header = NULL;
850     int slice_header_length_in_bits = 0;
851     unsigned int tail_data[] = { 0x0 };
852
853     if (encode_state->dec_ref_pic_marking)
854         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
855
856     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
857
858     if ( rate_control_mode != 2) {
859         qp = 26;
860         if ( intra_mb_size > 384*8)         //ONE MB raw data is 384 bytes
861             intra_mb_size = 384*8;
862         if ( inter_mb_size > 256*8)
863             intra_mb_size = 256*8;
864     }
865
866     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
867     
868     if (is_intra) {
869         dri_bo_map(vme_context->vme_output.bo , 1);
870         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
871     }
872
873     for (y = 0; y < height_in_mbs; y++) {
874         for (x = 0; x < width_in_mbs; x++) { 
875             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
876             
877             if (emit_new_state) {
878                 intel_batchbuffer_emit_mi_flush(batch);
879                 
880                 if (IS_GEN7(i965->intel.device_id)) {
881                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
882                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
883                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
884                 } else {
885                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
886                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
887                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
888                 }
889
890                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
891                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
892
893                 if (IS_GEN7(i965->intel.device_id)) {
894                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
895                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
896                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
897                 } else {
898                     gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
899                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
900                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
901                 }
902
903                 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context); 
904                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
905                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, 
906                                          encode_state, gen6_encoder_context, 
907                                          rate_control_mode == 0, qp);
908
909                 if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
910                     VAEncPackedHeaderParameterBuffer *param = NULL;
911                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
912                     unsigned int length_in_bits;
913
914                     assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
915                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
916                     length_in_bits = param->length_in_bits[0];
917
918                     gen6_mfc_avc_insert_object(ctx, 
919                                                gen6_encoder_context,
920                                                header_data,
921                                                ALIGN(length_in_bits, 32) >> 5,
922                                                length_in_bits & 0x1f,
923                                                param->skip_emulation_check_count,
924                                                0,
925                                                0,
926                                                param->insert_emulation_bytes);
927                 }
928
929                 if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
930                     VAEncPackedHeaderParameterBuffer *param = NULL;
931                     unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
932                     unsigned int length_in_bits;
933
934                     assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
935                     param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
936                     length_in_bits = param->length_in_bits[0];
937
938                     gen6_mfc_avc_insert_object(ctx, 
939                                                gen6_encoder_context,
940                                                header_data,
941                                                ALIGN(length_in_bits, 32) >> 5,
942                                                length_in_bits & 0x1f,
943                                                param->skip_emulation_check_count,
944                                                0,
945                                                0,
946                                                param->insert_emulation_bytes);
947                 }
948
949                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
950                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
951                                            5,  /* first 5 bytes are start code + nal unit type */
952                                            1, 0, 1);
953                 emit_new_state = 0;
954             }
955
956             if (is_intra) {
957                 assert(msg);
958                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, intra_mb_size);
959                 msg += 4;
960             } else {
961                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, inter_mb_size, pSliceParameter->slice_type);
962                 offset += 64;
963             }
964
965             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
966                 assert(0);
967                 intel_batchbuffer_end_atomic(batch);
968                 intel_batchbuffer_flush(batch);
969                 emit_new_state = 1;
970                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
971             }
972         }
973     }
974
975     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
976                                tail_data, sizeof(tail_data) >> 2, 32,
977                                sizeof(tail_data), 1, 1, 1);
978
979     if (is_intra)
980         dri_bo_unmap(vme_context->vme_output.bo);
981
982     free(slice_header);
983
984     intel_batchbuffer_end_atomic(batch);
985 }
986
987 static void 
988 gen6_mfc_free_avc_surface(void **data)
989 {
990     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
991
992     if (!avc_surface)
993         return;
994
995     dri_bo_unreference(avc_surface->dmv_top);
996     avc_surface->dmv_top = NULL;
997     dri_bo_unreference(avc_surface->dmv_bottom);
998     avc_surface->dmv_bottom = NULL;
999
1000     free(avc_surface);
1001     *data = NULL;
1002 }
1003
1004 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
1005                                      struct encode_state *encode_state,
1006                                      struct gen6_encoder_context *gen6_encoder_context)
1007 {
1008     struct i965_driver_data *i965 = i965_driver_data(ctx);
1009     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1010     struct object_surface *obj_surface; 
1011     struct object_buffer *obj_buffer;
1012     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1013     dri_bo *bo;
1014     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
1015     VAStatus vaStatus = VA_STATUS_SUCCESS;
1016         int i;
1017
1018     /*Setup all the input&output object*/
1019
1020     /* Setup current frame and current direct mv buffer*/
1021     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1022     assert(obj_surface);
1023     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1024     if ( obj_surface->private_data == NULL) {
1025         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1026         gen6_avc_surface->dmv_top = 
1027             dri_bo_alloc(i965->intel.bufmgr,
1028                     "Buffer",
1029                     68*8192, 
1030                     64);
1031         gen6_avc_surface->dmv_bottom = 
1032             dri_bo_alloc(i965->intel.bufmgr,
1033                             "Buffer",
1034                             68*8192, 
1035                             64);
1036         assert(gen6_avc_surface->dmv_top);
1037         assert(gen6_avc_surface->dmv_bottom);
1038         obj_surface->private_data = (void *)gen6_avc_surface;
1039         obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface; 
1040     }
1041     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1042     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1043     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1044         dri_bo_reference(gen6_avc_surface->dmv_top);
1045         dri_bo_reference(gen6_avc_surface->dmv_bottom);
1046
1047     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1048     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1049
1050     mfc_context->surface_state.width = obj_surface->orig_width;
1051     mfc_context->surface_state.height = obj_surface->orig_height;
1052     mfc_context->surface_state.w_pitch = obj_surface->width;
1053     mfc_context->surface_state.h_pitch = obj_surface->height;
1054     
1055     /* Setup reference frames and direct mv buffers*/
1056     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1057                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
1058                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1059                         assert(obj_surface);
1060                         if (obj_surface->bo != NULL) {
1061                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1062                                 dri_bo_reference(obj_surface->bo);
1063                         }
1064             /* Check DMV buffer */
1065             if ( obj_surface->private_data == NULL) {
1066                 
1067                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1068                 gen6_avc_surface->dmv_top = 
1069                     dri_bo_alloc(i965->intel.bufmgr,
1070                             "Buffer",
1071                             68*8192, 
1072                             64);
1073                 gen6_avc_surface->dmv_bottom = 
1074                     dri_bo_alloc(i965->intel.bufmgr,
1075                             "Buffer",
1076                             68*8192, 
1077                             64);
1078                 assert(gen6_avc_surface->dmv_top);
1079                 assert(gen6_avc_surface->dmv_bottom);
1080                 obj_surface->private_data = gen6_avc_surface;
1081                 obj_surface->free_private_data = gen6_mfc_free_avc_surface; 
1082             }
1083     
1084             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1085             /* Setup DMV buffer */
1086             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1087             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
1088             dri_bo_reference(gen6_avc_surface->dmv_top);
1089             dri_bo_reference(gen6_avc_surface->dmv_bottom);
1090                 } else {
1091                         break;
1092                 }
1093         }
1094         
1095     obj_surface = SURFACE(encode_state->current_render_target);
1096     assert(obj_surface && obj_surface->bo);
1097     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1098     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1099
1100     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1101     bo = obj_buffer->buffer_store->bo;
1102     assert(bo);
1103     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1104     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1105     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1106     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1107
1108     /*Programing bcs pipeline*/
1109     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
1110         
1111     return vaStatus;
1112 }
1113
1114 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
1115                              struct encode_state *encode_state,
1116                              struct gen6_encoder_context *gen6_encoder_context)
1117 {
1118     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1119
1120     intel_batchbuffer_flush(batch);             //run the pipeline
1121
1122     return VA_STATUS_SUCCESS;
1123 }
1124
1125 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
1126                               struct encode_state *encode_state,
1127                               struct gen6_encoder_context *gen6_encoder_context)
1128 {
1129 #if 0
1130     struct i965_driver_data *i965 = i965_driver_data(ctx);
1131     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1132         
1133     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
1134         
1135     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1136     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1137     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1138     my_debug(obj_surface);
1139
1140 #endif
1141
1142     return VA_STATUS_SUCCESS;
1143 }
1144
1145 static VAStatus
1146 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1147                             struct encode_state *encode_state,
1148                             struct gen6_encoder_context *gen6_encoder_context)
1149 {
1150     gen6_mfc_init(ctx, gen6_encoder_context);
1151     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1152     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1153     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
1154
1155     return VA_STATUS_SUCCESS;
1156 }
1157
1158 VAStatus
1159 gen6_mfc_pipeline(VADriverContextP ctx,
1160                   VAProfile profile,
1161                   struct encode_state *encode_state,
1162                   struct gen6_encoder_context *gen6_encoder_context)
1163 {
1164     VAStatus vaStatus;
1165
1166     switch (profile) {
1167     case VAProfileH264Baseline:
1168         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1169         break;
1170
1171         /* FIXME: add for other profile */
1172     default:
1173         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1174         break;
1175     }
1176
1177     return vaStatus;
1178 }
1179
1180 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1181 {
1182     return True;
1183 }
1184
1185 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1186 {
1187     int i;
1188
1189     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1190     mfc_context->post_deblocking_output.bo = NULL;
1191
1192     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1193     mfc_context->pre_deblocking_output.bo = NULL;
1194
1195     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1196     mfc_context->uncompressed_picture_source.bo = NULL;
1197
1198     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1199     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1200
1201     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1202         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1203         mfc_context->direct_mv_buffers[i].bo = NULL;
1204     }
1205
1206     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1207     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1208
1209         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1210         mfc_context->macroblock_status_buffer.bo = NULL;
1211
1212     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1213     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1214
1215     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1216     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1217
1218
1219     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1220         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1221         mfc_context->reference_surfaces[i].bo = NULL;  
1222     }
1223
1224     return True;
1225 }