i965_drv_video: insert PPS/SPS header into coded buffer
[profile/ivi/vaapi-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (0 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
218
219     BEGIN_BCS_BATCH(batch, 11);
220
221     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224     /* MFX Indirect MV Object Base Address */
225     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226     OUT_BCS_BATCH(batch, 0);    
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
232     OUT_BCS_RELOC(batch,
233                   mfc_context->mfc_indirect_pak_bse_object.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
246 {
247     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
250
251     BEGIN_BCS_BATCH(batch, 11);
252
253     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     /* MFX Indirect MV Object Base Address */
257     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
264     OUT_BCS_RELOC(batch,
265                   mfc_context->mfc_indirect_pak_bse_object.bo,
266                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                   0);
268     OUT_BCS_RELOC(batch,
269                   mfc_context->mfc_indirect_pak_bse_object.bo,
270                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
272
273     ADVANCE_BCS_BATCH(batch);
274 }
275
276 static void
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
278 {
279     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
281
282     BEGIN_BCS_BATCH(batch, 4);
283
284     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
287                   0);
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294 static void
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296                        struct gen6_encoder_context *gen6_encoder_context)
297 {
298     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
301     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
302     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
303
304     BEGIN_BCS_BATCH(batch, 13);
305     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
306     OUT_BCS_BATCH(batch, 
307                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
308     OUT_BCS_BATCH(batch, 
309                   (height_in_mbs << 16) | 
310                   (width_in_mbs << 0));
311     OUT_BCS_BATCH(batch, 
312                   (0 << 24) |     /*Second Chroma QP Offset*/
313                   (0 << 16) |     /*Chroma QP Offset*/
314                   (0 << 14) |   /*Max-bit conformance Intra flag*/
315                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
316                   (1 << 12) |   /*Should always be written as "1" */
317                   (0 << 10) |   /*QM Preset FLag */
318                   (0 << 8)  |   /*Image Structure*/
319                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
320     OUT_BCS_BATCH(batch,
321                   (400 << 16) |   /*Mininum Frame size*/        
322                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
323                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
324                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
325                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
326                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
327                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
328                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
329                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
330                   (pSequenceParameter->direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
331                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
332                   (1 << 2)  |   /*Frame MB only flag*/
333                   (0 << 1)  |   /*MBAFF mode is in active*/
334                   (0 << 0) );   /*Field picture flag*/
335     OUT_BCS_BATCH(batch, 
336                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
337                   (1<<12)   |   
338                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
339                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
340                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
341                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
342                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
343     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
344                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
345                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
346     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
347     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
348     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
349     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
350     OUT_BCS_BATCH(batch, 0x00800001);   
351     OUT_BCS_BATCH(batch, 0);
352
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
358 {
359     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
360     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
361
362     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
363     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
364
365     BEGIN_BCS_BATCH(batch, 16);
366     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
367     OUT_BCS_BATCH(batch,
368                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
369     OUT_BCS_BATCH(batch, 
370                   ((height_in_mbs - 1) << 16) | 
371                   ((width_in_mbs - 1) << 0));
372     OUT_BCS_BATCH(batch, 
373                   (0 << 24) |   /* Second Chroma QP Offset */
374                   (0 << 16) |   /* Chroma QP Offset */
375                   (0 << 14) |   /* Max-bit conformance Intra flag */
376                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
377                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
378                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
379                   (0 << 8)  |   /* FIXME: Image Structure */
380                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
381     OUT_BCS_BATCH(batch,
382                   (0 << 16) |   /* Mininum Frame size */
383                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
384                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
385                   (0 << 13) |   /* CABAC 0 word insertion test enable */
386                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
387                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
388                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
389                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
390                   (0 << 6)  |   /* Only valid for VLD decoding mode */
391                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
392                   (0 << 4)  |   /* Direct 8x8 inference flag */
393                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
394                   (1 << 2)  |   /* Frame MB only flag */
395                   (0 << 1)  |   /* MBAFF mode is in active */
396                   (0 << 0));    /* Field picture flag */
397     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
398     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
399                   (0xBB8 << 16) |       /* InterMbMaxSz */
400                   (0xEE8) );            /* IntraMbMaxSz */
401     OUT_BCS_BATCH(batch, 0);            /* Reserved */
402     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
403     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
404     OUT_BCS_BATCH(batch, 0x8C000000);
405     OUT_BCS_BATCH(batch, 0x00010000);
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0);
408     OUT_BCS_BATCH(batch, 0);
409     OUT_BCS_BATCH(batch, 0);
410
411     ADVANCE_BCS_BATCH(batch);
412 }
413
414 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
415 {
416     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
417     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
418
419     int i;
420
421     BEGIN_BCS_BATCH(batch, 69);
422
423     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
424
425     /* Reference frames and Current frames */
426     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
427         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
428             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
429                   I915_GEM_DOMAIN_INSTRUCTION, 0,
430                   0);
431          } else {
432              OUT_BCS_BATCH(batch, 0);
433          }
434     }
435
436     /* POL list */
437     for(i = 0; i < 32; i++) {
438         OUT_BCS_BATCH(batch, i/2);
439     }
440     OUT_BCS_BATCH(batch, 0);
441     OUT_BCS_BATCH(batch, 0);
442
443     ADVANCE_BCS_BATCH(batch);
444 }
445
446 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
447                                      int slice_type,
448                                      struct encode_state *encode_state,
449                                      struct gen6_encoder_context *gen6_encoder_context,
450                                      int rate_control_enable,
451                                      int qp)
452 {
453     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
454     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
455     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
456
457     BEGIN_BCS_BATCH(batch, 11);;
458
459     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
460
461         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
462
463     if ( slice_type == SLICE_TYPE_I )
464         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
465     else 
466         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
467
468     OUT_BCS_BATCH(batch, 
469                   (pSliceParameter->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
470                   (0<<24) |                /*Enable deblocking operation*/
471                   (qp<<16) |                    /*Slice Quantization Parameter*/
472                   0x0202 );
473     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
474     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
475
476     OUT_BCS_BATCH(batch, 
477                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
478                   (1<<30) |             /*ResetRateControlCounter*/
479                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
480                   (8<<24) |     /*RC Stable Tolerance, middle level*/
481                   (rate_control_enable<<23) |     /*RC Panic Enable*/                 
482                   (0<<22) |     /*QP mode, don't modfiy CBP*/
483                   (0<<21) |     /*MB Type Direct Conversion Disable*/ 
484                   (0<<20) |     /*MB Type Skip Conversion Disable*/ 
485                   (1<<19) |     /*IsLastSlice*/
486                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
487                   (1<<17) |         /*HeaderPresentFlag*/       
488                   (1<<16) |         /*SliceData PresentFlag*/
489                   (1<<15) |         /*TailPresentFlag*/
490                   (1<<13) |         /*RBSP NAL TYPE*/   
491                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
492         
493     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
494
495     OUT_BCS_BATCH(batch, (24<<24) |     /*Target QP - 24 is lowest QP*/ 
496                          (20<<16) |     /*Target QP + 20 is highest QP*/
497                          (8<<12)  |
498                          (8<<8)   |
499                          (8<<4)   |
500                          (8<<0));
501     OUT_BCS_BATCH(batch, 0x08888888);   
502     OUT_BCS_BATCH(batch, 0);
503
504     ADVANCE_BCS_BATCH(batch);
505 }
506 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
507 {
508     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
509     int i;
510
511     BEGIN_BCS_BATCH(batch, 58);
512
513     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
514     OUT_BCS_BATCH(batch, 0xFF ) ; 
515     for( i = 0; i < 56; i++) {
516         OUT_BCS_BATCH(batch, 0x10101010); 
517     }   
518
519     ADVANCE_BCS_BATCH(batch);
520 }
521
522 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
523 {
524     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
525     int i;
526
527     BEGIN_BCS_BATCH(batch, 113);
528     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
529
530     for(i = 0; i < 112;i++) {
531         OUT_BCS_BATCH(batch, 0x10001000);
532     }   
533
534     ADVANCE_BCS_BATCH(batch);   
535 }
536
537 static void
538 gen7_mfc_qm_state(VADriverContextP ctx,
539                   int qm_type,
540                   unsigned int *qm,
541                   int qm_length,
542                   struct gen6_encoder_context *gen6_encoder_context)
543 {
544     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
545     unsigned int qm_buffer[16];
546
547     assert(qm_length <= 16);
548     assert(sizeof(*qm) == 4);
549     memcpy(qm_buffer, qm, qm_length * 4);
550
551     BEGIN_BCS_BATCH(batch, 18);
552     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
553     OUT_BCS_BATCH(batch, qm_type << 0);
554     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
555     ADVANCE_BCS_BATCH(batch);
556 }
557
558 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
559 {
560     unsigned int qm[16] = {
561         0x10101010, 0x10101010, 0x10101010, 0x10101010,
562         0x10101010, 0x10101010, 0x10101010, 0x10101010,
563         0x10101010, 0x10101010, 0x10101010, 0x10101010,
564         0x10101010, 0x10101010, 0x10101010, 0x10101010
565     };
566
567     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
568     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
569     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
570     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
571 }
572
573 static void
574 gen7_mfc_fqm_state(VADriverContextP ctx,
575                    int fqm_type,
576                    unsigned int *fqm,
577                    int fqm_length,
578                    struct gen6_encoder_context *gen6_encoder_context)
579 {
580     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
581     unsigned int fqm_buffer[32];
582
583     assert(fqm_length <= 32);
584     assert(sizeof(*fqm) == 4);
585     memcpy(fqm_buffer, fqm, fqm_length * 4);
586
587     BEGIN_BCS_BATCH(batch, 34);
588     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
589     OUT_BCS_BATCH(batch, fqm_type << 0);
590     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
591     ADVANCE_BCS_BATCH(batch);
592 }
593
594 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
595 {
596     unsigned int qm[32] = {
597         0x10001000, 0x10001000, 0x10001000, 0x10001000,
598         0x10001000, 0x10001000, 0x10001000, 0x10001000,
599         0x10001000, 0x10001000, 0x10001000, 0x10001000,
600         0x10001000, 0x10001000, 0x10001000, 0x10001000,
601         0x10001000, 0x10001000, 0x10001000, 0x10001000,
602         0x10001000, 0x10001000, 0x10001000, 0x10001000,
603         0x10001000, 0x10001000, 0x10001000, 0x10001000,
604         0x10001000, 0x10001000, 0x10001000, 0x10001000
605     };
606
607     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
608     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
609     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
610     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
611 }
612
613 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
614 {
615     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
616     int i;
617
618         BEGIN_BCS_BATCH(batch, 10);
619         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
620         OUT_BCS_BATCH(batch, 0);                  //Select L0
621         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
622         for(i = 0; i < 7; i++) {
623                 OUT_BCS_BATCH(batch, 0x80808080);
624         }   
625         ADVANCE_BCS_BATCH(batch);
626
627         BEGIN_BCS_BATCH(batch, 10);
628         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
629         OUT_BCS_BATCH(batch, 1);                  //Select L1
630         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
631         for(i = 0; i < 7; i++) {
632                 OUT_BCS_BATCH(batch, 0x80808080);
633         }   
634         ADVANCE_BCS_BATCH(batch);
635 }
636         
637 static void
638 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
639                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
640                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulataion_flag)
641 {
642     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
643
644     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
645
646     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
647     OUT_BCS_BATCH(batch,
648                   (0 << 16) |   /* always start at offset 0 */
649                   (data_bits_in_last_dw << 8) |
650                   (skip_emul_byte_count << 4) |
651                   (emulataion_flag << 3) |
652                   ((!!is_last_header) << 2) |
653                   ((!!is_end_of_slice) << 1) |
654                   (0 << 0));    /* FIXME: ??? */
655
656     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
657     ADVANCE_BCS_BATCH(batch);
658 }
659
660 static int
661 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
662                               struct gen6_encoder_context *gen6_encoder_context,
663                               int intra_mb_size_in_bits)
664 {
665     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
666     int len_in_dwords = 11;
667     unsigned char target_mb_size = intra_mb_size_in_bits / 16;     //In Words
668     unsigned char max_mb_size = target_mb_size * 2 > 255? 255: target_mb_size * 2 ;
669
670     BEGIN_BCS_BATCH(batch, len_in_dwords);
671
672     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
673     OUT_BCS_BATCH(batch, 0);
674     OUT_BCS_BATCH(batch, 0);
675     OUT_BCS_BATCH(batch, 
676                   (0 << 24) |           /* PackedMvNum, Debug*/
677                   (0 << 20) |           /* No motion vector */
678                   (1 << 19) |           /* CbpDcY */
679                   (1 << 18) |           /* CbpDcU */
680                   (1 << 17) |           /* CbpDcV */
681                   (msg[0] & 0xFFFF) );
682
683     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
684     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
685     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
686
687     /*Stuff for Intra MB*/
688     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
689     OUT_BCS_BATCH(batch, msg[2]);       
690     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
691     
692     /*MaxSizeInWord and TargetSzieInWord*/
693     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
694                          (target_mb_size << 16) );
695
696     ADVANCE_BCS_BATCH(batch);
697
698     return len_in_dwords;
699 }
700
701 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
702                                          struct gen6_encoder_context *gen6_encoder_context,
703                                          int inter_mb_size_in_bits, int slice_type)
704 {
705     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
706     int len_in_dwords = 11;
707     unsigned char target_mb_size = inter_mb_size_in_bits / 16;     //In Words
708     unsigned char max_mb_size = target_mb_size * 16 > 255? 255: target_mb_size * 16 ;
709
710     BEGIN_BCS_BATCH(batch, len_in_dwords);
711
712     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
713
714     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
715     OUT_BCS_BATCH(batch, offset);
716
717     OUT_BCS_BATCH(batch, 
718                   (1 << 24) |     /* PackedMvNum, Debug*/
719                   (4 << 20) |     /* 8 MV, SNB don't use it*/
720                   (1 << 19) |     /* CbpDcY */
721                   (1 << 18) |     /* CbpDcU */
722                   (1 << 17) |     /* CbpDcV */
723                   (0 << 15) |     /* Transform8x8Flag = 0*/
724                   (0 << 14) |     /* Frame based*/
725                   (0 << 13) |     /* Inter MB */
726                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
727                   (0 << 7)  |     /* MBZ for frame */
728                   (0 << 6)  |     /* MBZ */
729                   (2 << 4)  |     /* MBZ for inter*/
730                   (0 << 3)  |     /* MBZ */
731                   (0 << 2)  |     /* SkipMbFlag */
732                   (0 << 0));      /* InterMbMode */
733
734     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
735     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */   
736     if ( slice_type == SLICE_TYPE_B) {
737         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
738     } else {
739         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
740     }
741
742
743     /*Stuff for Inter MB*/
744     OUT_BCS_BATCH(batch, 0x0);        
745     OUT_BCS_BATCH(batch, 0x0);    
746     OUT_BCS_BATCH(batch, 0x0);        
747
748     /*MaxSizeInWord and TargetSzieInWord*/
749     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
750                          (target_mb_size << 16) );
751
752     ADVANCE_BCS_BATCH(batch);
753
754     return len_in_dwords;
755 }
756
757 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
758 {
759     struct i965_driver_data *i965 = i965_driver_data(ctx);
760     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
761     dri_bo *bo;
762     int i;
763
764     /*Encode common setup for MFC*/
765     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
766     mfc_context->post_deblocking_output.bo = NULL;
767
768     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
769     mfc_context->pre_deblocking_output.bo = NULL;
770
771     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
772     mfc_context->uncompressed_picture_source.bo = NULL;
773
774     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
775     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
776
777     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
778         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
779             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
780         mfc_context->direct_mv_buffers[i].bo = NULL;
781     }
782
783     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
784         if (mfc_context->reference_surfaces[i].bo != NULL)
785             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
786         mfc_context->reference_surfaces[i].bo = NULL;  
787     }
788
789     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
790     bo = dri_bo_alloc(i965->intel.bufmgr,
791                       "Buffer",
792                       128 * 64,
793                       64);
794     assert(bo);
795     mfc_context->intra_row_store_scratch_buffer.bo = bo;
796
797     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
798     bo = dri_bo_alloc(i965->intel.bufmgr,
799                       "Buffer",
800                       4*9600,
801                       64);
802     assert(bo);
803     mfc_context->macroblock_status_buffer.bo = bo;
804
805     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
806     bo = dri_bo_alloc(i965->intel.bufmgr,
807                       "Buffer",
808                       49152,  /* 6 * 128 * 64 */
809                       64);
810     assert(bo);
811     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
812
813     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
814     bo = dri_bo_alloc(i965->intel.bufmgr,
815                       "Buffer",
816                       12288, /* 1.5 * 128 * 64 */
817                       0x1000);
818     assert(bo);
819     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
820 }
821
822 struct packed_data_format
823 {
824     unsigned int length_in_bits;
825     unsigned char flag;
826     unsigned char num_skip_bytes;
827     unsigned char pad[2];
828 };
829
830 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
831                                       struct encode_state *encode_state,
832                                       struct gen6_encoder_context *gen6_encoder_context)
833 {
834     struct i965_driver_data *i965 = i965_driver_data(ctx);
835     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
836     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
837     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
838     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
839     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
840     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
841     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
842     unsigned int *msg = NULL, offset = 0;
843     int emit_new_state = 1, object_len_in_bytes;
844     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
845     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
846     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
847     int x,y;
848     int rate_control_mode = pSequenceParameter->rate_control_method; 
849     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
850     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / fps / width_in_mbs / height_in_mbs;
851     int intra_mb_size = inter_mb_size * 5.0;
852     int qp = pPicParameter->pic_init_qp;
853     unsigned char *slice_header = NULL;
854     int slice_header_length_in_bits = 0;
855     unsigned int tail_data[] = { 0x0 };
856     struct packed_data_format *packed_sps = NULL, *packed_pps = NULL;
857
858     if (encode_state->dec_ref_pic_marking)
859         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
860
861     if (encode_state->packed_sps)
862         packed_sps = (struct packed_data_format *)encode_state->packed_sps->buffer;
863
864     if (encode_state->packed_pps)
865         packed_pps = (struct packed_data_format *)encode_state->packed_pps->buffer;
866
867     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
868
869     if ( rate_control_mode != 2) {
870         qp = 26;
871         if ( intra_mb_size > 384*8)         //ONE MB raw data is 384 bytes
872             intra_mb_size = 384*8;
873         if ( inter_mb_size > 256*8)
874             intra_mb_size = 256*8;
875     }
876
877     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
878     
879     if (is_intra) {
880         dri_bo_map(vme_context->vme_output.bo , 1);
881         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
882     }
883
884     for (y = 0; y < height_in_mbs; y++) {
885         for (x = 0; x < width_in_mbs; x++) { 
886             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
887             
888             if (emit_new_state) {
889                 intel_batchbuffer_emit_mi_flush(batch);
890                 
891                 if (IS_GEN7(i965->intel.device_id)) {
892                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
893                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
894                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
895                 } else {
896                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
897                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
898                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
899                 }
900
901                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
902                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
903
904                 if (IS_GEN7(i965->intel.device_id)) {
905                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
906                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
907                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
908                 } else {
909                     gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
910                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
911                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
912                 }
913
914                 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context); 
915                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
916                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, 
917                                          encode_state, gen6_encoder_context, 
918                                          rate_control_mode == 0, qp);
919
920                 if (packed_sps) {
921                     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
922                                                (unsigned int *)(packed_sps + 1), ALIGN(packed_sps->length_in_bits, 32) >> 5, packed_sps->length_in_bits & 0x1f,
923                                                packed_sps->num_skip_bytes, 0, 0, !!(packed_sps->flag & 0x1));
924                 }
925
926                 if (packed_pps) {
927                     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
928                                                (unsigned int *)(packed_pps + 1), ALIGN(packed_pps->length_in_bits, 32) >> 5, packed_pps->length_in_bits & 0x1f,
929                                                packed_pps->num_skip_bytes, 0, 0, !!(packed_pps->flag & 0x1));
930                 }
931
932                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
933                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
934                                            5,  /* first 5 bytes are start code + nal unit type */
935                                            1, 0, 1);
936                 emit_new_state = 0;
937             }
938
939             if (is_intra) {
940                 assert(msg);
941                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, intra_mb_size);
942                 msg += 4;
943             } else {
944                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, inter_mb_size, pSliceParameter->slice_type);
945                 offset += 64;
946             }
947
948             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
949                 assert(0);
950                 intel_batchbuffer_end_atomic(batch);
951                 intel_batchbuffer_flush(batch);
952                 emit_new_state = 1;
953                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
954             }
955         }
956     }
957
958     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
959                                tail_data, sizeof(tail_data) >> 2, 32,
960                                sizeof(tail_data), 1, 1, 1);
961
962     if (is_intra)
963         dri_bo_unmap(vme_context->vme_output.bo);
964
965     free(slice_header);
966
967     intel_batchbuffer_end_atomic(batch);
968 }
969
970 static void 
971 gen6_mfc_free_avc_surface(void **data)
972 {
973     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
974
975     if (!avc_surface)
976         return;
977
978     dri_bo_unreference(avc_surface->dmv_top);
979     avc_surface->dmv_top = NULL;
980     dri_bo_unreference(avc_surface->dmv_bottom);
981     avc_surface->dmv_bottom = NULL;
982
983     free(avc_surface);
984     *data = NULL;
985 }
986
987 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
988                                      struct encode_state *encode_state,
989                                      struct gen6_encoder_context *gen6_encoder_context)
990 {
991     struct i965_driver_data *i965 = i965_driver_data(ctx);
992     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
993     struct object_surface *obj_surface; 
994     struct object_buffer *obj_buffer;
995     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
996     dri_bo *bo;
997     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
998     VAStatus vaStatus = VA_STATUS_SUCCESS;
999         int i;
1000
1001     /*Setup all the input&output object*/
1002
1003     /* Setup current frame and current direct mv buffer*/
1004     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1005     assert(obj_surface);
1006     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1007     if ( obj_surface->private_data == NULL) {
1008         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1009         gen6_avc_surface->dmv_top = 
1010             dri_bo_alloc(i965->intel.bufmgr,
1011                     "Buffer",
1012                     68*8192, 
1013                     64);
1014         gen6_avc_surface->dmv_bottom = 
1015             dri_bo_alloc(i965->intel.bufmgr,
1016                             "Buffer",
1017                             68*8192, 
1018                             64);
1019         assert(gen6_avc_surface->dmv_top);
1020         assert(gen6_avc_surface->dmv_bottom);
1021         obj_surface->private_data = (void *)gen6_avc_surface;
1022         obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface; 
1023     }
1024     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1025     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1026     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1027         dri_bo_reference(gen6_avc_surface->dmv_top);
1028         dri_bo_reference(gen6_avc_surface->dmv_bottom);
1029
1030     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1031     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1032
1033     mfc_context->surface_state.width = obj_surface->orig_width;
1034     mfc_context->surface_state.height = obj_surface->orig_height;
1035     mfc_context->surface_state.w_pitch = obj_surface->width;
1036     mfc_context->surface_state.h_pitch = obj_surface->height;
1037     
1038     /* Setup reference frames and direct mv buffers*/
1039     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1040                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
1041                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1042                         assert(obj_surface);
1043                         if (obj_surface->bo != NULL) {
1044                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1045                                 dri_bo_reference(obj_surface->bo);
1046                         }
1047             /* Check DMV buffer */
1048             if ( obj_surface->private_data == NULL) {
1049                 
1050                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1051                 gen6_avc_surface->dmv_top = 
1052                     dri_bo_alloc(i965->intel.bufmgr,
1053                             "Buffer",
1054                             68*8192, 
1055                             64);
1056                 gen6_avc_surface->dmv_bottom = 
1057                     dri_bo_alloc(i965->intel.bufmgr,
1058                             "Buffer",
1059                             68*8192, 
1060                             64);
1061                 assert(gen6_avc_surface->dmv_top);
1062                 assert(gen6_avc_surface->dmv_bottom);
1063                 obj_surface->private_data = gen6_avc_surface;
1064                 obj_surface->free_private_data = gen6_mfc_free_avc_surface; 
1065             }
1066     
1067             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1068             /* Setup DMV buffer */
1069             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1070             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
1071             dri_bo_reference(gen6_avc_surface->dmv_top);
1072             dri_bo_reference(gen6_avc_surface->dmv_bottom);
1073                 } else {
1074                         break;
1075                 }
1076         }
1077         
1078     obj_surface = SURFACE(encode_state->current_render_target);
1079     assert(obj_surface && obj_surface->bo);
1080     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1081     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1082
1083     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1084     bo = obj_buffer->buffer_store->bo;
1085     assert(bo);
1086     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1087     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1088     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1089     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1090
1091     /*Programing bcs pipeline*/
1092     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
1093         
1094     return vaStatus;
1095 }
1096
1097 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
1098                              struct encode_state *encode_state,
1099                              struct gen6_encoder_context *gen6_encoder_context)
1100 {
1101     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1102
1103     intel_batchbuffer_flush(batch);             //run the pipeline
1104
1105     return VA_STATUS_SUCCESS;
1106 }
1107
1108 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
1109                               struct encode_state *encode_state,
1110                               struct gen6_encoder_context *gen6_encoder_context)
1111 {
1112 #if 0
1113     struct i965_driver_data *i965 = i965_driver_data(ctx);
1114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1115         
1116     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
1117         
1118     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1119     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1120     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1121     my_debug(obj_surface);
1122
1123 #endif
1124
1125     return VA_STATUS_SUCCESS;
1126 }
1127
1128 static VAStatus
1129 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1130                             struct encode_state *encode_state,
1131                             struct gen6_encoder_context *gen6_encoder_context)
1132 {
1133     gen6_mfc_init(ctx, gen6_encoder_context);
1134     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1135     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1136     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
1137
1138     return VA_STATUS_SUCCESS;
1139 }
1140
1141 VAStatus
1142 gen6_mfc_pipeline(VADriverContextP ctx,
1143                   VAProfile profile,
1144                   struct encode_state *encode_state,
1145                   struct gen6_encoder_context *gen6_encoder_context)
1146 {
1147     VAStatus vaStatus;
1148
1149     switch (profile) {
1150     case VAProfileH264Baseline:
1151         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1152         break;
1153
1154         /* FIXME: add for other profile */
1155     default:
1156         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1157         break;
1158     }
1159
1160     return vaStatus;
1161 }
1162
1163 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1164 {
1165     return True;
1166 }
1167
1168 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1169 {
1170     int i;
1171
1172     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1173     mfc_context->post_deblocking_output.bo = NULL;
1174
1175     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1176     mfc_context->pre_deblocking_output.bo = NULL;
1177
1178     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1179     mfc_context->uncompressed_picture_source.bo = NULL;
1180
1181     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1182     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1183
1184     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1185         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1186         mfc_context->direct_mv_buffers[i].bo = NULL;
1187     }
1188
1189     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1190     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1191
1192         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1193         mfc_context->macroblock_status_buffer.bo = NULL;
1194
1195     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1196     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1197
1198     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1199     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1200
1201     return True;
1202 }