i965_drv_video: append trail data as the indicator of slice
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (0 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
217
218     BEGIN_BCS_BATCH(batch, 11);
219
220     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
221     OUT_BCS_BATCH(batch, 0);
222     OUT_BCS_BATCH(batch, 0);
223     /* MFX Indirect MV Object Base Address */
224     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
225     OUT_BCS_BATCH(batch, 0);    
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233
234     ADVANCE_BCS_BATCH(batch);
235 }
236
237 static void
238 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
239 {
240     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
241     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
242
243     BEGIN_BCS_BATCH(batch, 11);
244
245     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
246     OUT_BCS_BATCH(batch, 0);
247     OUT_BCS_BATCH(batch, 0);
248     /* MFX Indirect MV Object Base Address */
249     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
250     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
251     OUT_BCS_BATCH(batch, 0);
252     OUT_BCS_BATCH(batch, 0);
253     OUT_BCS_BATCH(batch, 0);
254     OUT_BCS_BATCH(batch, 0);
255     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
258
259     ADVANCE_BCS_BATCH(batch);
260 }
261
262 static void
263 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
264 {
265     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
266     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
267
268     BEGIN_BCS_BATCH(batch, 4);
269
270     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
271     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
272                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
273                   0);
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch, 0);
276
277     ADVANCE_BCS_BATCH(batch);
278 }
279
280 static void
281 gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
282 {
283     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
284     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
285
286     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
287     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
288
289     BEGIN_BCS_BATCH(batch, 13);
290     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
291     OUT_BCS_BATCH(batch, 
292                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
293     OUT_BCS_BATCH(batch, 
294                   (height_in_mbs << 16) | 
295                   (width_in_mbs << 0));
296     OUT_BCS_BATCH(batch, 
297                   (0 << 24) |     /*Second Chroma QP Offset*/
298                   (0 << 16) |     /*Chroma QP Offset*/
299                   (0 << 14) |   /*Max-bit conformance Intra flag*/
300                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
301                   (1 << 12) |   /*Should always be written as "1" */
302                   (0 << 10) |   /*QM Preset FLag */
303                   (0 << 8)  |   /*Image Structure*/
304                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
305     OUT_BCS_BATCH(batch,
306                   (400 << 16) |   /*Mininum Frame size*/        
307                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
308                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
309                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
310                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
311                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
312                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
313                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
314                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
315                   (0 << 4)  |   /*Direct 8x8 inference flag*/
316                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
317                   (1 << 2)  |   /*Frame MB only flag*/
318                   (0 << 1)  |   /*MBAFF mode is in active*/
319                   (0 << 0) );   /*Field picture flag*/
320     OUT_BCS_BATCH(batch, 
321                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
322                   (1<<12)   |   
323                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
324                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
325                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
326                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
327                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
328     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
329                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
330                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
331     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
332     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
333     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
334     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
335     OUT_BCS_BATCH(batch, 0x00800001);   
336     OUT_BCS_BATCH(batch, 0);
337
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
343 {
344     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
345     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
346
347     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
348     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
349
350     BEGIN_BCS_BATCH(batch, 16);
351     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
352     OUT_BCS_BATCH(batch,
353                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
354     OUT_BCS_BATCH(batch, 
355                   ((height_in_mbs - 1) << 16) | 
356                   ((width_in_mbs - 1) << 0));
357     OUT_BCS_BATCH(batch, 
358                   (0 << 24) |   /* Second Chroma QP Offset */
359                   (0 << 16) |   /* Chroma QP Offset */
360                   (0 << 14) |   /* Max-bit conformance Intra flag */
361                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
362                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
363                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
364                   (0 << 8)  |   /* FIXME: Image Structure */
365                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
366     OUT_BCS_BATCH(batch,
367                   (0 << 16) |   /* Mininum Frame size */
368                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
369                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
370                   (0 << 13) |   /* CABAC 0 word insertion test enable */
371                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
372                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
373                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
374                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
375                   (0 << 6)  |   /* Only valid for VLD decoding mode */
376                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
377                   (0 << 4)  |   /* Direct 8x8 inference flag */
378                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
379                   (1 << 2)  |   /* Frame MB only flag */
380                   (0 << 1)  |   /* MBAFF mode is in active */
381                   (0 << 0));    /* Field picture flag */
382     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
383     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
384                   (0xBB8 << 16) |       /* InterMbMaxSz */
385                   (0xEE8) );            /* IntraMbMaxSz */
386     OUT_BCS_BATCH(batch, 0);            /* Reserved */
387     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
388     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
389     OUT_BCS_BATCH(batch, 0x8C000000);
390     OUT_BCS_BATCH(batch, 0x00010000);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     OUT_BCS_BATCH(batch, 0);
394     OUT_BCS_BATCH(batch, 0);
395
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
400 {
401     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
402     int i;
403
404     BEGIN_BCS_BATCH(batch, 69);
405
406     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
407     //TODO: reference DMV
408     for(i = 0; i < 16; i++){
409         OUT_BCS_BATCH(batch, 0);
410         OUT_BCS_BATCH(batch, 0);
411     }
412
413     //TODO: current DMV just for test
414 #if 0
415     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
416                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
417                   0);
418 #else
419     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
420     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
421     OUT_BCS_BATCH(batch, 0);
422 #endif
423
424
425     OUT_BCS_BATCH(batch, 0);
426
427     //TODO: POL list
428     for(i = 0; i < 34; i++) {
429         OUT_BCS_BATCH(batch, 0);
430     }
431
432     ADVANCE_BCS_BATCH(batch);
433 }
434
435 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
436                                      int slice_type,
437                                      struct gen6_encoder_context *gen6_encoder_context,
438                                      int rate_control_enable,
439                                      int qp)
440 {
441     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
442     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
443
444     BEGIN_BCS_BATCH(batch, 11);;
445
446     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
447
448         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
449
450     if ( slice_type == SLICE_TYPE_I )
451         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
452     else 
453         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
454
455     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
456                   (qp<<16) |                    /*Slice Quantization Parameter*/
457                   0x0202 );
458     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
459     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
460
461     OUT_BCS_BATCH(batch, 
462                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
463                   (1<<30) |             /*ResetRateControlCounter*/
464                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
465                   (8<<24) |     /*RC Stable Tolerance, middle level*/
466                   (rate_control_enable<<23) |     /*RC Panic Enable*/                  
467                   (0<<22) |     /*QP mode, don't modfiy CBP*/
468                   (1<<19) |     /*IsLastSlice*/
469                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
470                   (1<<17) |         /*HeaderPresentFlag*/       
471                   (1<<16) |         /*SliceData PresentFlag*/
472                   (1<<15) |         /*TailPresentFlag*/
473                   (1<<13) |         /*RBSP NAL TYPE*/   
474                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
475         
476     OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo,
477                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
478                   mfc_context->mfc_indirect_pak_bse_object.offset);
479
480     OUT_BCS_BATCH(batch, (24<<24) |     /*Target QP - 24 is lowest QP*/ 
481                          (20<<16) |     /*Target QP + 20 is highest QP*/
482                          (8<<12)  |
483                          (8<<8)   |
484                          (8<<4)   |
485                          (8<<0));
486     OUT_BCS_BATCH(batch, 0x08888888);   
487     OUT_BCS_BATCH(batch, 0);
488
489     ADVANCE_BCS_BATCH(batch);
490 }
491 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
492 {
493     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
494     int i;
495
496     BEGIN_BCS_BATCH(batch, 58);
497
498     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
499     OUT_BCS_BATCH(batch, 0xFF ) ; 
500     for( i = 0; i < 56; i++) {
501         OUT_BCS_BATCH(batch, 0x10101010); 
502     }   
503
504     ADVANCE_BCS_BATCH(batch);
505 }
506
507 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
508 {
509     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
510     int i;
511
512     BEGIN_BCS_BATCH(batch, 113);
513     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
514
515     for(i = 0; i < 112;i++) {
516         OUT_BCS_BATCH(batch, 0x10001000);
517     }   
518
519     ADVANCE_BCS_BATCH(batch);   
520 }
521
522 static void
523 gen7_mfc_qm_state(VADriverContextP ctx,
524                   int qm_type,
525                   unsigned int *qm,
526                   int qm_length,
527                   struct gen6_encoder_context *gen6_encoder_context)
528 {
529     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
530     unsigned int qm_buffer[16];
531
532     assert(qm_length <= 16);
533     assert(sizeof(*qm) == 4);
534     memcpy(qm_buffer, qm, qm_length * 4);
535
536     BEGIN_BCS_BATCH(batch, 18);
537     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
538     OUT_BCS_BATCH(batch, qm_type << 0);
539     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
540     ADVANCE_BCS_BATCH(batch);
541 }
542
543 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
544 {
545     unsigned int qm[16] = {
546         0x10101010, 0x10101010, 0x10101010, 0x10101010,
547         0x10101010, 0x10101010, 0x10101010, 0x10101010,
548         0x10101010, 0x10101010, 0x10101010, 0x10101010,
549         0x10101010, 0x10101010, 0x10101010, 0x10101010
550     };
551
552     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
553     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
554     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
555     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
556 }
557
558 static void
559 gen7_mfc_fqm_state(VADriverContextP ctx,
560                    int fqm_type,
561                    unsigned int *fqm,
562                    int fqm_length,
563                    struct gen6_encoder_context *gen6_encoder_context)
564 {
565     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
566     unsigned int fqm_buffer[32];
567
568     assert(fqm_length <= 32);
569     assert(sizeof(*fqm) == 4);
570     memcpy(fqm_buffer, fqm, fqm_length * 4);
571
572     BEGIN_BCS_BATCH(batch, 34);
573     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
574     OUT_BCS_BATCH(batch, fqm_type << 0);
575     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
576     ADVANCE_BCS_BATCH(batch);
577 }
578
579 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
580 {
581     unsigned int qm[32] = {
582         0x10001000, 0x10001000, 0x10001000, 0x10001000,
583         0x10001000, 0x10001000, 0x10001000, 0x10001000,
584         0x10001000, 0x10001000, 0x10001000, 0x10001000,
585         0x10001000, 0x10001000, 0x10001000, 0x10001000,
586         0x10001000, 0x10001000, 0x10001000, 0x10001000,
587         0x10001000, 0x10001000, 0x10001000, 0x10001000,
588         0x10001000, 0x10001000, 0x10001000, 0x10001000,
589         0x10001000, 0x10001000, 0x10001000, 0x10001000
590     };
591
592     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
593     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
594     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
595     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
596 }
597
598 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
599 {
600     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
601     int i;
602
603         BEGIN_BCS_BATCH(batch, 10);
604         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
605         OUT_BCS_BATCH(batch, 0);                  //Select L0
606         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
607         for(i = 0; i < 7; i++) {
608                 OUT_BCS_BATCH(batch, 0x80808080);
609         }   
610         ADVANCE_BCS_BATCH(batch);
611
612         BEGIN_BCS_BATCH(batch, 10);
613         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
614         OUT_BCS_BATCH(batch, 1);                  //Select L1
615         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
616         for(i = 0; i < 7; i++) {
617                 OUT_BCS_BATCH(batch, 0x80808080);
618         }   
619         ADVANCE_BCS_BATCH(batch);
620 }
621         
622 static void
623 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
624                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
625                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice)
626 {
627     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
628
629     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
630
631     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
632     OUT_BCS_BATCH(batch,
633                   (0 << 16) |   /* always start at offset 0 */
634                   (data_bits_in_last_dw << 8) |
635                   (skip_emul_byte_count << 4) |
636                   (1 << 3) |    /* FIXME: ??? */
637                   ((!!is_last_header) << 2) |
638                   ((!!is_end_of_slice) << 1) |
639                   (0 << 0));    /* FIXME: ??? */
640
641     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
642     ADVANCE_BCS_BATCH(batch);
643 }
644
645 static int
646 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
647                               struct gen6_encoder_context *gen6_encoder_context,
648                               int intra_mb_size_in_bits)
649 {
650     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
651     int len_in_dwords = 11;
652     unsigned char target_mb_size = intra_mb_size_in_bits / 16;     //In Words
653     unsigned char max_mb_size = target_mb_size * 2 > 255? 255: target_mb_size * 2 ;
654
655     BEGIN_BCS_BATCH(batch, len_in_dwords);
656
657     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 0);
660     OUT_BCS_BATCH(batch, 
661                   (0 << 24) |           /* PackedMvNum, Debug*/
662                   (0 << 20) |           /* No motion vector */
663                   (1 << 19) |           /* CbpDcY */
664                   (1 << 18) |           /* CbpDcU */
665                   (1 << 17) |           /* CbpDcV */
666                   (msg[0] & 0xFFFF) );
667
668     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
669     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
670     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
671
672     /*Stuff for Intra MB*/
673     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
674     OUT_BCS_BATCH(batch, msg[2]);       
675     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
676     
677     /*MaxSizeInWord and TargetSzieInWord*/
678     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
679                          (target_mb_size << 16) );
680
681     ADVANCE_BCS_BATCH(batch);
682
683     return len_in_dwords;
684 }
685
686 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
687                                          struct gen6_encoder_context *gen6_encoder_context,
688                                          int inter_mb_size_in_bits)
689 {
690     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
691     int len_in_dwords = 11;
692     unsigned char target_mb_size = inter_mb_size_in_bits / 16;     //In Words
693     unsigned char max_mb_size = target_mb_size * 16 > 255? 255: target_mb_size * 16 ;
694
695     BEGIN_BCS_BATCH(batch, len_in_dwords);
696
697     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
698
699     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
700     OUT_BCS_BATCH(batch, offset);
701
702     OUT_BCS_BATCH(batch, 
703                   (1 << 24) |     /* PackedMvNum, Debug*/
704                   (4 << 20) |     /* 8 MV, SNB don't use it*/
705                   (1 << 19) |     /* CbpDcY */
706                   (1 << 18) |     /* CbpDcU */
707                   (1 << 17) |     /* CbpDcV */
708                   (0 << 15) |     /* Transform8x8Flag = 0*/
709                   (0 << 14) |     /* Frame based*/
710                   (0 << 13) |     /* Inter MB */
711                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
712                   (0 << 7)  |     /* MBZ for frame */
713                   (0 << 6)  |     /* MBZ */
714                   (2 << 4)  |     /* MBZ for inter*/
715                   (0 << 3)  |     /* MBZ */
716                   (0 << 2)  |     /* SkipMbFlag */
717                   (0 << 0));      /* InterMbMode */
718
719     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
720     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
721     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
722
723     /*Stuff for Inter MB*/
724     OUT_BCS_BATCH(batch, 0x0);        
725     OUT_BCS_BATCH(batch, 0x0);    
726     OUT_BCS_BATCH(batch, 0x0);        
727
728     /*MaxSizeInWord and TargetSzieInWord*/
729     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
730                          (target_mb_size << 16) );
731
732     ADVANCE_BCS_BATCH(batch);
733
734     return len_in_dwords;
735 }
736
737 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
738 {
739     struct i965_driver_data *i965 = i965_driver_data(ctx);
740     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
741     dri_bo *bo;
742     int i;
743
744     /*Encode common setup for MFC*/
745     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
746     mfc_context->post_deblocking_output.bo = NULL;
747
748     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
749     mfc_context->pre_deblocking_output.bo = NULL;
750
751     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
752     mfc_context->uncompressed_picture_source.bo = NULL;
753
754     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
755     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
756
757     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
758         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
759         mfc_context->direct_mv_buffers[i].bo = NULL;
760     }
761
762     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
763         if (mfc_context->reference_surfaces[i].bo != NULL)
764             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
765         mfc_context->reference_surfaces[i].bo = NULL;  
766     }
767
768     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
769     bo = dri_bo_alloc(i965->intel.bufmgr,
770                       "Buffer",
771                       128 * 64,
772                       64);
773     assert(bo);
774     mfc_context->intra_row_store_scratch_buffer.bo = bo;
775
776     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
777     bo = dri_bo_alloc(i965->intel.bufmgr,
778                       "Buffer",
779                       4*9600,
780                       64);
781     assert(bo);
782     mfc_context->macroblock_status_buffer.bo = bo;
783
784     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
785     bo = dri_bo_alloc(i965->intel.bufmgr,
786                       "Buffer",
787                       49152,  /* 6 * 128 * 64 */
788                       64);
789     assert(bo);
790     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
791
792     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
793     bo = dri_bo_alloc(i965->intel.bufmgr,
794                       "Buffer",
795                       12288, /* 1.5 * 128 * 64 */
796                       0x1000);
797     assert(bo);
798     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
799 }
800
801 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
802                                       struct encode_state *encode_state,
803                                       struct gen6_encoder_context *gen6_encoder_context)
804 {
805     struct i965_driver_data *i965 = i965_driver_data(ctx);
806     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
807     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
808     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
809     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
810     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
811     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
812     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
813     unsigned int *msg = NULL, offset = 0;
814     int emit_new_state = 1, object_len_in_bytes;
815     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
816     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
817     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
818     int x,y;
819     int rate_control_mode = pSequenceParameter->rate_control_method; 
820     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
821     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / fps / width_in_mbs / height_in_mbs;
822     int intra_mb_size = inter_mb_size * 5.0;
823     int qp = pPicParameter->pic_init_qp;
824     unsigned char *slice_header = NULL;
825     int slice_header_length_in_bits = 0;
826     unsigned int tail_data[] = { 0x0 };
827
828     if (encode_state->dec_ref_pic_marking)
829         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
830
831     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
832
833     if ( rate_control_mode != 2) {
834         qp = 26;
835         if ( intra_mb_size > 384*8)         //ONE MB raw data is 384 bytes
836             intra_mb_size = 384*8;
837         if ( inter_mb_size > 256*8)
838             intra_mb_size = 256*8;
839     }
840
841     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
842     
843     if (is_intra) {
844         dri_bo_map(vme_context->vme_output.bo , 1);
845         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
846     }
847
848     for (y = 0; y < height_in_mbs; y++) {
849         for (x = 0; x < width_in_mbs; x++) { 
850             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
851             
852             if (emit_new_state) {
853                 intel_batchbuffer_emit_mi_flush(batch);
854                 
855                 if (IS_GEN7(i965->intel.device_id)) {
856                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
857                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
858                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
859                 } else {
860                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
861                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
862                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
863                 }
864
865                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
866                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
867
868                 if (IS_GEN7(i965->intel.device_id)) {
869                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
870                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
871                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
872                 } else {
873                     gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
874                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
875                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
876                 }
877
878                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
879                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, gen6_encoder_context, rate_control_mode == 0, qp);
880                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
881                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
882                                            5, 1, 0); /* first 5 bytes are start code + nal unit type */
883                 emit_new_state = 0;
884             }
885
886             if (is_intra) {
887                 assert(msg);
888                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, intra_mb_size);
889                 msg += 4;
890             } else {
891                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, inter_mb_size);
892                 offset += 64;
893             }
894
895             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
896                 assert(0);
897                 intel_batchbuffer_end_atomic(batch);
898                 intel_batchbuffer_flush(batch);
899                 emit_new_state = 1;
900                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
901             }
902         }
903     }
904
905     gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
906                                tail_data, sizeof(tail_data) >> 2, 32,
907                                sizeof(tail_data), 1, 1);
908
909     if (is_intra)
910         dri_bo_unmap(vme_context->vme_output.bo);
911
912     free(slice_header);
913
914     intel_batchbuffer_end_atomic(batch);
915 }
916
917 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
918                                      struct encode_state *encode_state,
919                                      struct gen6_encoder_context *gen6_encoder_context)
920 {
921     struct i965_driver_data *i965 = i965_driver_data(ctx);
922     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
923     struct object_surface *obj_surface; 
924     struct object_buffer *obj_buffer;
925     dri_bo *bo;
926     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
927     VAStatus vaStatus = VA_STATUS_SUCCESS;
928         int i;
929
930     /*Setup all the input&output object*/
931     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
932     assert(obj_surface);
933     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
934
935     mfc_context->post_deblocking_output.bo = obj_surface->bo;
936     dri_bo_reference(mfc_context->post_deblocking_output.bo);
937
938     mfc_context->surface_state.width = obj_surface->orig_width;
939     mfc_context->surface_state.height = obj_surface->orig_height;
940     mfc_context->surface_state.w_pitch = obj_surface->width;
941     mfc_context->surface_state.h_pitch = obj_surface->height;
942
943     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
944                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
945                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
946                         assert(obj_surface);
947                         if (obj_surface->bo != NULL) {
948                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
949                                 dri_bo_reference(obj_surface->bo);
950                         }
951                 } else {
952                         break;
953                 }
954         }
955         
956     obj_surface = SURFACE(encode_state->current_render_target);
957     assert(obj_surface && obj_surface->bo);
958     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
959     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
960
961     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
962     bo = obj_buffer->buffer_store->bo;
963     assert(bo);
964     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
965     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
966     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
967
968     /*Programing bcs pipeline*/
969     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
970         
971     return vaStatus;
972 }
973
974 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
975                              struct encode_state *encode_state,
976                              struct gen6_encoder_context *gen6_encoder_context)
977 {
978     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
979
980     intel_batchbuffer_flush(batch);             //run the pipeline
981
982     return VA_STATUS_SUCCESS;
983 }
984
985 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
986                               struct encode_state *encode_state,
987                               struct gen6_encoder_context *gen6_encoder_context)
988 {
989 #if 0
990     struct i965_driver_data *i965 = i965_driver_data(ctx);
991     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
992         
993     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
994         
995     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
996     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
997     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
998     my_debug(obj_surface);
999
1000 #endif
1001
1002     return VA_STATUS_SUCCESS;
1003 }
1004
1005 static VAStatus
1006 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1007                             struct encode_state *encode_state,
1008                             struct gen6_encoder_context *gen6_encoder_context)
1009 {
1010     gen6_mfc_init(ctx, gen6_encoder_context);
1011     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1012     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1013     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
1014
1015     return VA_STATUS_SUCCESS;
1016 }
1017
1018 VAStatus
1019 gen6_mfc_pipeline(VADriverContextP ctx,
1020                   VAProfile profile,
1021                   struct encode_state *encode_state,
1022                   struct gen6_encoder_context *gen6_encoder_context)
1023 {
1024     VAStatus vaStatus;
1025
1026     switch (profile) {
1027     case VAProfileH264Baseline:
1028         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1029         break;
1030
1031         /* FIXME: add for other profile */
1032     default:
1033         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1034         break;
1035     }
1036
1037     return vaStatus;
1038 }
1039
1040 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1041 {
1042     return True;
1043 }
1044
1045 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1046 {
1047     int i;
1048
1049     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1050     mfc_context->post_deblocking_output.bo = NULL;
1051
1052     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1053     mfc_context->pre_deblocking_output.bo = NULL;
1054
1055     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1056     mfc_context->uncompressed_picture_source.bo = NULL;
1057
1058     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1059     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1060
1061     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1062         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1063         mfc_context->direct_mv_buffers[i].bo = NULL;
1064     }
1065
1066     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1067     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1068
1069         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1070         mfc_context->macroblock_status_buffer.bo = NULL;
1071
1072     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1073     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1074
1075     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1076     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1077
1078     return True;
1079 }