i965_drv_video: improved MV quality for VME
[platform/upstream/libva.git] / i965_drv_video / gen6_mfc.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40
41 static void
42 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
43 {
44     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
45
46     BEGIN_BCS_BATCH(batch,4);
47
48     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
49     OUT_BCS_BATCH(batch,
50                   (0 << 10) | /* disable Stream-Out */
51                   (1 << 9)  | /* Post Deblocking Output */
52                   (0 << 8)  | /* Pre Deblocking Output */
53                   (0 << 7)  | /* disable TLB prefectch */
54                   (0 << 5)  | /* not in stitch mode */
55                   (1 << 4)  | /* encoding mode */
56                   (2 << 0));  /* Standard Select: AVC */
57     OUT_BCS_BATCH(batch,
58                   (0 << 20) | /* round flag in PB slice */
59                   (0 << 19) | /* round flag in Intra8x8 */
60                   (0 << 7)  | /* expand NOA bus flag */
61                   (1 << 6)  | /* must be 1 */
62                   (0 << 5)  | /* disable clock gating for NOA */
63                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
64                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
65                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
66                   (0 << 1)  | /* AVC long field motion vector */
67                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
68     OUT_BCS_BATCH(batch, 0);
69
70     ADVANCE_BCS_BATCH(batch);
71 }
72
73 static void
74 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
75 {
76     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
77     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
78
79     BEGIN_BCS_BATCH(batch, 6);
80
81     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
82     OUT_BCS_BATCH(batch, 0);
83     OUT_BCS_BATCH(batch,
84                   ((mfc_context->surface_state.height - 1) << 19) |
85                   ((mfc_context->surface_state.width - 1) << 6));
86     OUT_BCS_BATCH(batch,
87                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
88                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
89                   (0 << 22) | /* surface object control state, FIXME??? */
90                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
91                   (0 << 2)  | /* must be 0 for interleave U/V */
92                   (1 << 1)  | /* must be y-tiled */
93                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
94     OUT_BCS_BATCH(batch,
95                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
96                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
97     OUT_BCS_BATCH(batch, 0);
98     ADVANCE_BCS_BATCH(batch);
99 }
100
101 static void
102 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
103 {
104     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
105     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
106     int i;
107
108     BEGIN_BCS_BATCH(batch, 24);
109
110     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
111
112     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
113
114     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
115                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
116                   0);                                                                                   /* post output addr  */ 
117
118     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
119                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
120                   0);                                                                                   /* uncompressed data */
121
122     OUT_BCS_BATCH(batch, 0);                                                                                    /* StreamOut data*/
123     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
124                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
125                   0);   
126     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
127                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
128                   0);
129     /* 7..22 Reference pictures*/
130     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
131         if ( mfc_context->reference_surfaces[i].bo != NULL) {
132             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
133                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
134                           0);                   
135         } else {
136             OUT_BCS_BATCH(batch, 0);
137         }
138     }
139     OUT_BCS_BATCH(batch, 0);                                                                                    /* no block status  */
140
141     ADVANCE_BCS_BATCH(batch);
142 }
143
144 static void
145 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
146 {
147     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
148     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
149
150     BEGIN_BCS_BATCH(batch, 11);
151
152     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
153     OUT_BCS_BATCH(batch, 0);
154     OUT_BCS_BATCH(batch, 0);
155     /* MFX Indirect MV Object Base Address */
156     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
157     OUT_BCS_BATCH(batch, 0);    
158     OUT_BCS_BATCH(batch, 0);
159     OUT_BCS_BATCH(batch, 0);
160     OUT_BCS_BATCH(batch, 0);
161     OUT_BCS_BATCH(batch, 0);
162     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165
166     ADVANCE_BCS_BATCH(batch);
167 }
168
169 static void
170 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
171 {
172     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
173     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
174
175     BEGIN_BCS_BATCH(batch, 4);
176
177     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
178     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
179                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
180                   0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183
184     ADVANCE_BCS_BATCH(batch);
185 }
186
187 static void
188 gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
189 {
190     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
191     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
192
193     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
194     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
195
196     BEGIN_BCS_BATCH(batch, 13);
197     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
198     OUT_BCS_BATCH(batch, 
199                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
200     OUT_BCS_BATCH(batch, 
201                   (height_in_mbs << 16) | 
202                   (width_in_mbs << 0));
203     OUT_BCS_BATCH(batch, 
204                   (0 << 24) |     /*Second Chroma QP Offset*/
205                   (0 << 16) |     /*Chroma QP Offset*/
206                   (0 << 14) |   /*Max-bit conformance Intra flag*/
207                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
208                   (1 << 12) |   /*Should always be written as "1" */
209                   (0 << 10) |   /*QM Preset FLag */
210                   (0 << 8)  |   /*Image Structure*/
211                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
212     OUT_BCS_BATCH(batch,
213                   (0 << 16) |   /*Mininum Frame size*/  
214                   (0 << 15) |     /*Disable reading of Macroblock Status Buffer*/
215                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
216                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
217                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
218                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
219                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
220                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
221                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
222                   (0 << 4)  |   /*Direct 8x8 inference flag*/
223                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
224                   (1 << 2)  |   /*Frame MB only flag*/
225                   (0 << 1)  |   /*MBAFF mode is in active*/
226                   (0 << 0) );   /*Field picture flag*/
227     OUT_BCS_BATCH(batch, 0);            /*Mainly about MB rate control and debug, just ignoring*/
228     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
229                   (0xBB8 << 16) |               /*InterMbMaxSz*/
230                   (0xEE8) );                    /*IntraMbMaxSz*/
231     OUT_BCS_BATCH(batch, 0);            /*Reserved*/
232     OUT_BCS_BATCH(batch, 0);            /*Slice QP Delta for bitrate control*/
233     OUT_BCS_BATCH(batch, 0);            /*Slice QP Delta for bitrate control*/  
234     OUT_BCS_BATCH(batch, 0x8C000000);
235     OUT_BCS_BATCH(batch, 0x00010000);
236     OUT_BCS_BATCH(batch, 0);
237
238     ADVANCE_BCS_BATCH(batch);
239 }
240
241
242 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
243 {
244     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
245     int i;
246
247     BEGIN_BCS_BATCH(batch, 69);
248
249     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
250     //TODO: reference DMV
251     for(i = 0; i < 16; i++){
252         OUT_BCS_BATCH(batch, 0);
253         OUT_BCS_BATCH(batch, 0);
254     }
255
256     //TODO: current DMV just for test
257 #if 0
258     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
259                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
260                   0);
261 #else
262     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
263     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
264     OUT_BCS_BATCH(batch, 0);
265 #endif
266
267
268     OUT_BCS_BATCH(batch, 0);
269
270     //TODO: POL list
271     for(i = 0; i < 34; i++) {
272         OUT_BCS_BATCH(batch, 0);
273     }
274
275     ADVANCE_BCS_BATCH(batch);
276 }
277
278 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
279                                      int intra_slice,
280                                      struct gen6_encoder_context *gen6_encoder_context)
281 {
282     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
283     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
284
285     BEGIN_BCS_BATCH(batch, 11);;
286
287     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
288
289     if ( intra_slice )
290         OUT_BCS_BATCH(batch, 2);                        /*Slice Type: I Slice*/
291     else
292         OUT_BCS_BATCH(batch, 0);                        /*Slice Type: P Slice*/
293
294     if ( intra_slice )
295         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
296     else 
297         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
298
299     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
300                   (26<<16) |                    /*Slice Quantization Parameter*/
301                   0x0202 );
302     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
303     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
304
305     OUT_BCS_BATCH(batch, 
306                   (0<<31) |             /*RateControlCounterEnable = disable*/
307                   (1<<30) |             /*ResetRateControlCounter*/
308                   (2<<28) |             /*RC Triggle Mode = Loose Rate Control*/
309                   (1<<19) |             /*IsLastSlice*/
310                   (0<<18) |             /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
311                   (0<<17) |             /*HeaderPresentFlag*/   
312                   (1<<16) |             /*SliceData PresentFlag*/
313                   (0<<15) |             /*TailPresentFlag*/
314                   (1<<13) |             /*RBSP NAL TYPE*/       
315                   (0<<12) );            /*CabacZeroWordInsertionEnable*/
316         
317     OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo,
318                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
319                   mfc_context->mfc_indirect_pak_bse_object.offset);
320
321     OUT_BCS_BATCH(batch, 0);
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324
325     ADVANCE_BCS_BATCH(batch);
326 }
327 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
328 {
329     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
330     int i;
331
332     BEGIN_BCS_BATCH(batch, 58);
333
334     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
335     OUT_BCS_BATCH(batch, 0xFF ) ; 
336     for( i = 0; i < 56; i++) {
337         OUT_BCS_BATCH(batch, 0x10101010); 
338     }   
339
340     ADVANCE_BCS_BATCH(batch);
341 }
342
343 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
344 {
345     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
346     int i;
347
348     BEGIN_BCS_BATCH(batch, 113);
349     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
350
351     for(i = 0; i < 112;i++) {
352         OUT_BCS_BATCH(batch, 0x10001000);
353     }   
354
355     ADVANCE_BCS_BATCH(batch);   
356 }
357
358 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
359 {
360     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
361     int i;
362
363     BEGIN_BCS_BATCH(batch, 10);
364
365     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
366     OUT_BCS_BATCH(batch, 0);                  //Select L0
367
368     OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
369     for(i = 0; i < 7; i++) {
370         OUT_BCS_BATCH(batch, 0x80808080);
371     }
372
373     ADVANCE_BCS_BATCH(batch);
374 }
375         
376         
377 static void
378 gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
379 {
380     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
381
382     BEGIN_BCS_BATCH(batch, 4);
383
384     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
385     OUT_BCS_BATCH(batch, (32<<8) | 
386                   (1 << 3) |
387                   (1 << 2) |
388                   (flush_data << 1) |
389                   (1<<0) );
390     OUT_BCS_BATCH(batch, 0x00000003);
391     OUT_BCS_BATCH(batch, 0xABCD1234);
392
393     ADVANCE_BCS_BATCH(batch);
394 }
395
396 static int
397 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
398                               struct gen6_encoder_context *gen6_encoder_context)
399 {
400     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
401     int len_in_dwords = 11;
402
403     BEGIN_BCS_BATCH(batch, len_in_dwords);
404
405     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0);
408     OUT_BCS_BATCH(batch, 
409                   (0 << 24) |           /* PackedMvNum, Debug*/
410                   (0 << 20) |           /* No motion vector */
411                   (1 << 19) |           /* CbpDcY */
412                   (1 << 18) |           /* CbpDcU */
413                   (1 << 17) |           /* CbpDcV */
414                   (msg[0] & 0xFFFF) );
415
416     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
417     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
418     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
419
420     /*Stuff for Intra MB*/
421     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
422     OUT_BCS_BATCH(batch, msg[2]);       
423     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
424
425     OUT_BCS_BATCH(batch, 0x8040000);    /*MaxSizeInWord and TargetSzieInWord*/
426
427     ADVANCE_BCS_BATCH(batch);
428
429     return len_in_dwords;
430 }
431
432 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
433                                          struct gen6_encoder_context *gen6_encoder_context)
434 {
435     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
436     int len_in_dwords = 11;
437
438     BEGIN_BCS_BATCH(batch, len_in_dwords);
439
440     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
441
442     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
443     OUT_BCS_BATCH(batch, offset);
444
445     OUT_BCS_BATCH(batch, 
446                   (1 << 24) |     /* PackedMvNum, Debug*/
447                   (4 << 20) |     /* 8 MV, SNB don't use it*/
448                   (1 << 19) |     /* CbpDcY */
449                   (1 << 18) |     /* CbpDcU */
450                   (1 << 17) |     /* CbpDcV */
451                   (0 << 15) |     /* Transform8x8Flag = 0*/
452                   (0 << 14) |     /* Frame based*/
453                   (0 << 13) |     /* Inter MB */
454                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
455                   (0 << 7)  |     /* MBZ for frame */
456                   (0 << 6)  |     /* MBZ */
457                   (2 << 4)  |     /* MBZ for inter*/
458                   (0 << 3)  |     /* MBZ */
459                   (0 << 2)  |     /* SkipMbFlag */
460                   (0 << 0));      /* InterMbMode */
461
462     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
463     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
464     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
465
466     /*Stuff for Inter MB*/
467     OUT_BCS_BATCH(batch, 0x0);        
468     OUT_BCS_BATCH(batch, 0x0);    
469     OUT_BCS_BATCH(batch, 0x0);        
470
471     OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
472
473     ADVANCE_BCS_BATCH(batch);
474
475     return len_in_dwords;
476 }
477
478 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
479 {
480     struct i965_driver_data *i965 = i965_driver_data(ctx);
481     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
482     dri_bo *bo;
483     int i;
484
485     /*Encode common setup for MFC*/
486     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
487     mfc_context->post_deblocking_output.bo = NULL;
488
489     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
490     mfc_context->pre_deblocking_output.bo = NULL;
491
492     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
493     mfc_context->uncompressed_picture_source.bo = NULL;
494
495     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
496     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
497
498     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
499         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
500         mfc_context->direct_mv_buffers[i].bo = NULL;
501     }
502
503     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
504         if (mfc_context->reference_surfaces[i].bo != NULL)
505             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
506         mfc_context->reference_surfaces[i].bo = NULL;  
507     }
508
509     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
510     bo = dri_bo_alloc(i965->intel.bufmgr,
511                       "Buffer",
512                       128 * 64,
513                       64);
514     assert(bo);
515     mfc_context->intra_row_store_scratch_buffer.bo = bo;
516
517     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
518     bo = dri_bo_alloc(i965->intel.bufmgr,
519                       "Buffer",
520                       49152,  /* 6 * 128 * 64 */
521                       64);
522     assert(bo);
523     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
524
525     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
526     bo = dri_bo_alloc(i965->intel.bufmgr,
527                       "Buffer",
528                       12288, /* 1.5 * 128 * 64 */
529                       0x1000);
530     assert(bo);
531     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
532 }
533
534 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
535                                       struct encode_state *encode_state,
536                                       struct gen6_encoder_context *gen6_encoder_context)
537 {
538     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
539     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
540     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
541     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
542     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
543     unsigned int *msg = NULL, offset = 0;
544     int emit_new_state = 1, object_len_in_bytes;
545     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
546     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
547     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
548     int x,y;
549
550     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
551
552     if (is_intra) {
553         dri_bo_map(vme_context->vme_output.bo , 1);
554         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
555     }
556
557     for (y = 0; y < height_in_mbs; y++) {
558         for (x = 0; x < width_in_mbs; x++) { 
559             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
560             int qp = pSequenceParameter->initial_qp;
561
562             if (emit_new_state) {
563                 intel_batchbuffer_emit_mi_flush(batch);
564                 gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
565                 gen6_mfc_surface_state(ctx, gen6_encoder_context);
566                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
567                 gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
568                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
569                 gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
570                 gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
571                 gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
572                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
573                 /*gen6_mfc_avc_directmode_state(ctx);*/
574                 gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
575                 /*gen6_mfc_avc_insert_object(ctx, 0);*/
576                 emit_new_state = 0;
577             }
578
579             if (is_intra) {
580                 assert(msg);
581                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
582                 msg += 4;
583             } else {
584                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context);
585                 offset += 64;
586             }
587
588             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
589                 intel_batchbuffer_end_atomic(batch);
590                 intel_batchbuffer_flush(batch);
591                 emit_new_state = 1;
592                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
593             }
594         }
595     }
596
597     if (is_intra)
598         dri_bo_unmap(vme_context->vme_output.bo);
599         
600     intel_batchbuffer_end_atomic(batch);
601 }
602
603 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
604                                      struct encode_state *encode_state,
605                                      struct gen6_encoder_context *gen6_encoder_context)
606 {
607     struct i965_driver_data *i965 = i965_driver_data(ctx);
608     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
609     struct object_surface *obj_surface; 
610     struct object_buffer *obj_buffer;
611     dri_bo *bo;
612     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
613     VAStatus vaStatus = VA_STATUS_SUCCESS;
614
615     /*Setup all the input&output object*/
616     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
617     assert(obj_surface);
618     i965_check_alloc_surface_bo(ctx, obj_surface, 1);
619
620     mfc_context->post_deblocking_output.bo = obj_surface->bo;
621     dri_bo_reference(mfc_context->post_deblocking_output.bo);
622
623     mfc_context->surface_state.width = obj_surface->orig_width;
624     mfc_context->surface_state.height = obj_surface->orig_height;
625     mfc_context->surface_state.w_pitch = obj_surface->width;
626     mfc_context->surface_state.h_pitch = obj_surface->height;
627
628     obj_surface = SURFACE(pPicParameter->reference_picture);
629     assert(obj_surface);
630     if (obj_surface->bo != NULL) {
631         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
632         dri_bo_reference(obj_surface->bo);
633     }
634         
635     obj_surface = SURFACE(encode_state->current_render_target);
636     assert(obj_surface && obj_surface->bo);
637     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
638     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
639
640     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
641     bo = obj_buffer->buffer_store->bo;
642     assert(bo);
643     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
644     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
645     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
646
647     /*Programing bcs pipeline*/
648     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
649         
650     return vaStatus;
651 }
652
653 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
654                              struct encode_state *encode_state,
655                              struct gen6_encoder_context *gen6_encoder_context)
656 {
657     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
658
659     intel_batchbuffer_flush(batch);             //run the pipeline
660
661     return VA_STATUS_SUCCESS;
662 }
663
664 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
665                               struct encode_state *encode_state,
666                               struct gen6_encoder_context *gen6_encoder_context)
667 {
668 #if 0
669     struct i965_driver_data *i965 = i965_driver_data(ctx);
670     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
671         
672     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
673         
674     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
675     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
676     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
677     my_debug(obj_surface);
678
679 #endif
680
681     return VA_STATUS_SUCCESS;
682 }
683
684 static VAStatus
685 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
686                             struct encode_state *encode_state,
687                             struct gen6_encoder_context *gen6_encoder_context)
688 {
689     gen6_mfc_init(ctx, gen6_encoder_context);
690     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
691     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
692     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
693
694     return VA_STATUS_SUCCESS;
695 }
696
697 VAStatus
698 gen6_mfc_pipeline(VADriverContextP ctx,
699                   VAProfile profile,
700                   struct encode_state *encode_state,
701                   struct gen6_encoder_context *gen6_encoder_context)
702 {
703     VAStatus vaStatus;
704
705     switch (profile) {
706     case VAProfileH264Baseline:
707         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
708         break;
709
710         /* FIXME: add for other profile */
711     default:
712         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
713         break;
714     }
715
716     return vaStatus;
717 }
718
719 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
720 {
721     return True;
722 }
723
724 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
725 {
726     int i;
727
728     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
729     mfc_context->post_deblocking_output.bo = NULL;
730
731     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
732     mfc_context->pre_deblocking_output.bo = NULL;
733
734     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
735     mfc_context->uncompressed_picture_source.bo = NULL;
736
737     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
738     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
739
740     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
741         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
742         mfc_context->direct_mv_buffers[i].bo = NULL;
743     }
744
745     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
746     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
747
748     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
749     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
750
751     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
752     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
753
754     return True;
755 }