Avoid depending on va_backend.h for some files
[platform/upstream/libva-intel-driver.git] / src / gen6_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "gen6_vme.h"
39 #include "i965_encoder.h"
40
41 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
42 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
43 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
44
45 #define SURFACE_STATE_PADDED_SIZE_0_GEN6        ALIGN(sizeof(struct i965_surface_state), 32)
46 #define SURFACE_STATE_PADDED_SIZE_1_GEN6        ALIGN(sizeof(struct i965_surface_state2), 32)
47 #define SURFACE_STATE_PADDED_SIZE_GEN6          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7)
48
49 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
50 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
51 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
52
53 #define VME_INTRA_SHADER        0       
54 #define VME_INTER_SHADER        1
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59   
60 static const uint32_t gen6_vme_intra_frame[][4] = {
61 #include "shaders/vme/intra_frame.g6b"
62 };
63
64 static const uint32_t gen6_vme_inter_frame[][4] = {
65 #include "shaders/vme/inter_frame.g6b"
66 };
67
68 static struct i965_kernel gen6_vme_kernels[] = {
69     {
70         "VME Intra Frame",
71         VME_INTRA_SHADER,                                                                               /*index*/
72         gen6_vme_intra_frame,                   
73         sizeof(gen6_vme_intra_frame),           
74         NULL
75     },
76     {
77         "VME inter Frame",
78         VME_INTER_SHADER,
79         gen6_vme_inter_frame,
80         sizeof(gen6_vme_inter_frame),
81         NULL
82     }
83 };
84
85 static const uint32_t gen7_vme_intra_frame[][4] = {
86 #include "shaders/vme/intra_frame.g7b"
87 };
88
89 static const uint32_t gen7_vme_inter_frame[][4] = {
90 #include "shaders/vme/inter_frame.g7b"
91 };
92
93 static struct i965_kernel gen7_vme_kernels[] = {
94     {
95         "VME Intra Frame",
96         VME_INTRA_SHADER,                                                                               /*index*/
97         gen7_vme_intra_frame,                   
98         sizeof(gen7_vme_intra_frame),           
99         NULL
100     },
101     {
102         "VME inter Frame",
103         VME_INTER_SHADER,
104         gen7_vme_inter_frame,
105         sizeof(gen7_vme_inter_frame),
106         NULL
107     }
108 };
109
110 static void
111 gen6_vme_set_common_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
112 {
113     switch (tiling) {
114     case I915_TILING_NONE:
115         ss->ss3.tiled_surface = 0;
116         ss->ss3.tile_walk = 0;
117         break;
118     case I915_TILING_X:
119         ss->ss3.tiled_surface = 1;
120         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
121         break;
122     case I915_TILING_Y:
123         ss->ss3.tiled_surface = 1;
124         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
125         break;
126     }
127 }
128
129 static void
130 gen6_vme_set_source_surface_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
131 {
132     switch (tiling) {
133     case I915_TILING_NONE:
134         ss->ss2.tiled_surface = 0;
135         ss->ss2.tile_walk = 0;
136         break;
137     case I915_TILING_X:
138         ss->ss2.tiled_surface = 1;
139         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
140         break;
141     case I915_TILING_Y:
142         ss->ss2.tiled_surface = 1;
143         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
144         break;
145     }
146 }
147
148 /* only used for VME source surface state */
149 static void gen6_vme_source_surface_state(VADriverContextP ctx,
150                                           int index,
151                                           struct object_surface *obj_surface,
152                                           struct gen6_encoder_context *gen6_encoder_context)
153 {
154     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
155     struct i965_surface_state2 *ss;
156     dri_bo *bo;
157     int w, h, w_pitch, h_pitch;
158     unsigned int tiling, swizzle;
159
160     assert(obj_surface->bo);
161     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
162
163     w = obj_surface->orig_width;
164     h = obj_surface->orig_height;
165     w_pitch = obj_surface->width;
166     h_pitch = obj_surface->height;
167
168     bo = vme_context->surface_state_binding_table.bo;
169     dri_bo_map(bo, 1);
170     assert(bo->virtual);
171
172     ss = (struct i965_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
173     memset(ss, 0, sizeof(*ss));
174
175     ss->ss0.surface_base_address = obj_surface->bo->offset;
176
177     ss->ss1.cbcr_pixel_offset_v_direction = 2;
178     ss->ss1.width = w - 1;
179     ss->ss1.height = h - 1;
180
181     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
182     ss->ss2.interleave_chroma = 1;
183     ss->ss2.pitch = w_pitch - 1;
184     ss->ss2.half_pitch_for_chroma = 0;
185
186     gen6_vme_set_source_surface_tiling(ss, tiling);
187
188     /* UV offset for interleave mode */
189     ss->ss3.x_offset_for_cb = 0;
190     ss->ss3.y_offset_for_cb = h_pitch;
191
192     dri_bo_emit_reloc(bo,
193                       I915_GEM_DOMAIN_RENDER, 0,
194                       0,
195                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
196                       obj_surface->bo);
197
198     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
199     dri_bo_unmap(bo);
200 }
201
202 static void
203 gen6_vme_media_source_surface_state(VADriverContextP ctx,
204                                     int index,
205                                     struct object_surface *obj_surface,
206                                     struct gen6_encoder_context *gen6_encoder_context)
207 {
208     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
209     struct i965_surface_state *ss;
210     dri_bo *bo;
211     int w, h, w_pitch;
212     unsigned int tiling, swizzle;
213
214     w = obj_surface->orig_width;
215     h = obj_surface->orig_height;
216     w_pitch = obj_surface->width;
217
218     /* Y plane */
219     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
220
221     bo = vme_context->surface_state_binding_table.bo;
222     dri_bo_map(bo, True);
223     assert(bo->virtual);
224
225     ss = (struct i965_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
226     memset(ss, 0, sizeof(*ss));
227     ss->ss0.surface_type = I965_SURFACE_2D;
228     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
229     ss->ss1.base_addr = obj_surface->bo->offset;
230     ss->ss2.width = w / 4 - 1;
231     ss->ss2.height = h - 1;
232     ss->ss3.pitch = w_pitch - 1;
233     gen6_vme_set_common_surface_tiling(ss, tiling);
234     dri_bo_emit_reloc(bo,
235                       I915_GEM_DOMAIN_RENDER, 
236                       0,
237                       0,
238                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
239                       obj_surface->bo);
240
241     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
242     dri_bo_unmap(bo);
243 }
244
245 static VAStatus
246 gen6_vme_output_buffer_setup(VADriverContextP ctx,
247                              struct encode_state *encode_state,
248                              int index,
249                              struct gen6_encoder_context *gen6_encoder_context)
250
251 {
252     struct i965_driver_data *i965 = i965_driver_data(ctx);
253     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
254     struct i965_surface_state *ss;
255     dri_bo *bo;
256     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
257     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
258     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
259     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
260     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
261     int num_entries;
262
263     if ( is_intra ) {
264         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
265     } else {
266         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
267     }
268     vme_context->vme_output.size_block = 16; /* an OWORD */
269     vme_context->vme_output.pitch = ALIGN(vme_context->vme_output.size_block, 16);
270     bo = dri_bo_alloc(i965->intel.bufmgr, 
271                       "VME output buffer",
272                       vme_context->vme_output.num_blocks * vme_context->vme_output.pitch,
273                       0x1000);
274     assert(bo);
275     vme_context->vme_output.bo = bo;
276
277     bo = vme_context->surface_state_binding_table.bo;
278     dri_bo_map(bo, 1);
279     assert(bo->virtual);
280
281     ss = (struct i965_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
282     memset(ss, 0, sizeof(*ss));
283
284     /* always use 16 bytes as pitch on Sandy Bridge */
285     num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.pitch / 16;
286     ss->ss0.render_cache_read_mode = 1;
287     ss->ss0.surface_type = I965_SURFACE_BUFFER;
288     ss->ss1.base_addr = vme_context->vme_output.bo->offset;
289     ss->ss2.width = ((num_entries - 1) & 0x7f);
290     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
291     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
292     ss->ss3.pitch = vme_context->vme_output.pitch - 1;
293     dri_bo_emit_reloc(bo,
294                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
295                       0,
296                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
297                       vme_context->vme_output.bo);
298
299
300     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
301     dri_bo_unmap(bo);
302     return VA_STATUS_SUCCESS;
303 }
304
305 static VAStatus gen6_vme_surface_setup(VADriverContextP ctx, 
306                                        struct encode_state *encode_state,
307                                        int is_intra,
308                                        struct gen6_encoder_context *gen6_encoder_context)
309 {
310     struct i965_driver_data *i965 = i965_driver_data(ctx);
311     struct object_surface *obj_surface;
312     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
313
314     /*Setup surfaces state*/
315     /* current picture for encoding */
316     obj_surface = SURFACE(encode_state->current_render_target);
317     assert(obj_surface);
318     gen6_vme_source_surface_state(ctx, 0, obj_surface, gen6_encoder_context);
319     gen6_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
320
321     if ( ! is_intra ) {
322         /* reference 0 */
323         obj_surface = SURFACE(pPicParameter->reference_picture);
324         assert(obj_surface);
325         gen6_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
326         /* reference 1, FIXME: */
327         // obj_surface = SURFACE(pPicParameter->reference_picture);
328         // assert(obj_surface);
329         //gen6_vme_source_surface_state(ctx, 2, obj_surface);
330     }
331
332     /* VME output */
333     gen6_vme_output_buffer_setup(ctx, encode_state, 3, gen6_encoder_context);
334
335     return VA_STATUS_SUCCESS;
336 }
337
338 /*
339  * Surface state for IvyBridge
340  */
341 static void
342 gen7_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
343 {
344     switch (tiling) {
345     case I915_TILING_NONE:
346         ss->ss0.tiled_surface = 0;
347         ss->ss0.tile_walk = 0;
348         break;
349     case I915_TILING_X:
350         ss->ss0.tiled_surface = 1;
351         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
352         break;
353     case I915_TILING_Y:
354         ss->ss0.tiled_surface = 1;
355         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
356         break;
357     }
358 }
359
360 static void
361 gen7_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
362 {
363     switch (tiling) {
364     case I915_TILING_NONE:
365         ss->ss2.tiled_surface = 0;
366         ss->ss2.tile_walk = 0;
367         break;
368     case I915_TILING_X:
369         ss->ss2.tiled_surface = 1;
370         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
371         break;
372     case I915_TILING_Y:
373         ss->ss2.tiled_surface = 1;
374         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
375         break;
376     }
377 }
378
379 /* only used for VME source surface state */
380 static void gen7_vme_source_surface_state(VADriverContextP ctx,
381                                           int index,
382                                           struct object_surface *obj_surface,
383                                           struct gen6_encoder_context *gen6_encoder_context)
384 {
385     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
386     struct gen7_surface_state2 *ss;
387     dri_bo *bo;
388     int w, h, w_pitch, h_pitch;
389     unsigned int tiling, swizzle;
390
391     assert(obj_surface->bo);
392
393     w = obj_surface->orig_width;
394     h = obj_surface->orig_height;
395     w_pitch = obj_surface->width;
396     h_pitch = obj_surface->height;
397
398     bo = vme_context->surface_state_binding_table.bo;
399     dri_bo_map(bo, 1);
400     assert(bo->virtual);
401
402     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
403     memset(ss, 0, sizeof(*ss));
404
405     ss->ss0.surface_base_address = obj_surface->bo->offset;
406
407     ss->ss1.cbcr_pixel_offset_v_direction = 2;
408     ss->ss1.width = w - 1;
409     ss->ss1.height = h - 1;
410
411     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
412     ss->ss2.interleave_chroma = 1;
413     ss->ss2.pitch = w_pitch - 1;
414     ss->ss2.half_pitch_for_chroma = 0;
415
416     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
417     gen7_vme_set_source_surface_tiling(ss, tiling);
418
419     /* UV offset for interleave mode */
420     ss->ss3.x_offset_for_cb = 0;
421     ss->ss3.y_offset_for_cb = h_pitch;
422
423     dri_bo_emit_reloc(bo,
424                       I915_GEM_DOMAIN_RENDER, 0,
425                       0,
426                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
427                       obj_surface->bo);
428
429     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
430     dri_bo_unmap(bo);
431 }
432
433 static void
434 gen7_vme_media_source_surface_state(VADriverContextP ctx,
435                                     int index,
436                                     struct object_surface *obj_surface,
437                                     struct gen6_encoder_context *gen6_encoder_context)
438 {
439     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
440     struct gen7_surface_state *ss;
441     dri_bo *bo;
442     int w, h, w_pitch;
443     unsigned int tiling, swizzle;
444
445     /* Y plane */
446     w = obj_surface->orig_width;
447     h = obj_surface->orig_height;
448     w_pitch = obj_surface->width;
449
450     bo = vme_context->surface_state_binding_table.bo;
451     dri_bo_map(bo, True);
452     assert(bo->virtual);
453
454     ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
455     memset(ss, 0, sizeof(*ss));
456
457     ss->ss0.surface_type = I965_SURFACE_2D;
458     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
459
460     ss->ss1.base_addr = obj_surface->bo->offset;
461
462     ss->ss2.width = w / 4 - 1;
463     ss->ss2.height = h - 1;
464
465     ss->ss3.pitch = w_pitch - 1;
466
467     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
468     gen7_vme_set_common_surface_tiling(ss, tiling);
469
470     dri_bo_emit_reloc(bo,
471                       I915_GEM_DOMAIN_RENDER, 0,
472                       0,
473                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
474                       obj_surface->bo);
475
476     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
477     dri_bo_unmap(bo);
478 }
479
480 static VAStatus
481 gen7_vme_output_buffer_setup(VADriverContextP ctx,
482                              struct encode_state *encode_state,
483                              int index,
484                              struct gen6_encoder_context *gen6_encoder_context)
485
486 {
487     struct i965_driver_data *i965 = i965_driver_data(ctx);
488     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
489     struct gen7_surface_state *ss;
490     dri_bo *bo;
491     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
492     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
493     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
494     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
495     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
496     int num_entries;
497
498     if ( is_intra ) {
499         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
500     } else {
501         vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
502     }
503     vme_context->vme_output.size_block = 16; /* an OWORD */
504     vme_context->vme_output.pitch = ALIGN(vme_context->vme_output.size_block, 16);
505     bo = dri_bo_alloc(i965->intel.bufmgr, 
506                       "VME output buffer",
507                       vme_context->vme_output.num_blocks * vme_context->vme_output.pitch,
508                       0x1000);
509     assert(bo);
510     vme_context->vme_output.bo = bo;
511
512     bo = vme_context->surface_state_binding_table.bo;
513     dri_bo_map(bo, 1);
514     assert(bo->virtual);
515
516     ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
517     ss = bo->virtual;
518     memset(ss, 0, sizeof(*ss));
519
520     /* always use 16 bytes as pitch on Sandy Bridge */
521     num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.pitch / 16;
522
523     ss->ss0.surface_type = I965_SURFACE_BUFFER;
524
525     ss->ss1.base_addr = vme_context->vme_output.bo->offset;
526
527     ss->ss2.width = ((num_entries - 1) & 0x7f);
528     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
529     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
530
531     ss->ss3.pitch = vme_context->vme_output.pitch - 1;
532
533     dri_bo_emit_reloc(bo,
534                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
535                       0,
536                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
537                       vme_context->vme_output.bo);
538
539     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
540     dri_bo_unmap(bo);
541
542     return VA_STATUS_SUCCESS;
543 }
544
545 static VAStatus gen7_vme_surface_setup(VADriverContextP ctx, 
546                                        struct encode_state *encode_state,
547                                        int is_intra,
548                                        struct gen6_encoder_context *gen6_encoder_context)
549 {
550     struct i965_driver_data *i965 = i965_driver_data(ctx);
551     struct object_surface *obj_surface;
552     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
553
554     /*Setup surfaces state*/
555     /* current picture for encoding */
556     obj_surface = SURFACE(encode_state->current_render_target);
557     assert(obj_surface);
558     gen7_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
559     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
560
561     if ( ! is_intra ) {
562         /* reference 0 */
563         obj_surface = SURFACE(pPicParameter->reference_picture);
564         assert(obj_surface);
565         gen7_vme_source_surface_state(ctx, 2, obj_surface, gen6_encoder_context);
566         /* reference 1, FIXME: */
567         // obj_surface = SURFACE(pPicParameter->reference_picture);
568         // assert(obj_surface);
569         //gen7_vme_source_surface_state(ctx, 3, obj_surface);
570     }
571
572     /* VME output */
573     gen7_vme_output_buffer_setup(ctx, encode_state, 0, gen6_encoder_context);
574
575     return VA_STATUS_SUCCESS;
576 }
577
578 static VAStatus gen6_vme_interface_setup(VADriverContextP ctx, 
579                                          struct encode_state *encode_state,
580                                          struct gen6_encoder_context *gen6_encoder_context)
581 {
582     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
583     struct gen6_interface_descriptor_data *desc;   
584     int i;
585     dri_bo *bo;
586
587     bo = vme_context->idrt.bo;
588     dri_bo_map(bo, 1);
589     assert(bo->virtual);
590     desc = bo->virtual;
591
592     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
593         struct i965_kernel *kernel;
594         kernel = &vme_context->vme_kernels[i];
595         assert(sizeof(*desc) == 32);
596         /*Setup the descritor table*/
597         memset(desc, 0, sizeof(*desc));
598         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
599         desc->desc2.sampler_count = 1; /* FIXME: */
600         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
601         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
602         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
603         desc->desc4.constant_urb_entry_read_offset = 0;
604         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
605                 
606         /*kernel start*/
607         dri_bo_emit_reloc(bo,   
608                           I915_GEM_DOMAIN_INSTRUCTION, 0,
609                           0,
610                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
611                           kernel->bo);
612         /*Sampler State(VME state pointer)*/
613         dri_bo_emit_reloc(bo,
614                           I915_GEM_DOMAIN_INSTRUCTION, 0,
615                           (1 << 2),                                                                     //
616                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
617                           vme_context->vme_state.bo);
618         desc++;
619     }
620     dri_bo_unmap(bo);
621
622     return VA_STATUS_SUCCESS;
623 }
624
625 static VAStatus gen6_vme_constant_setup(VADriverContextP ctx, 
626                                         struct encode_state *encode_state,
627                                         struct gen6_encoder_context *gen6_encoder_context)
628 {
629     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
630     unsigned char *constant_buffer;
631
632     dri_bo_map(vme_context->curbe.bo, 1);
633     assert(vme_context->curbe.bo->virtual);
634     constant_buffer = vme_context->curbe.bo->virtual;
635         
636     /*TODO copy buffer into CURB*/
637
638     dri_bo_unmap( vme_context->curbe.bo);
639
640     return VA_STATUS_SUCCESS;
641 }
642
643 static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
644                                          struct encode_state *encode_state,
645                                          int is_intra,
646                                          struct gen6_encoder_context *gen6_encoder_context)
647 {
648     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
649     unsigned int *vme_state_message;
650     int i;
651         
652     //building VME state message
653     dri_bo_map(vme_context->vme_state.bo, 1);
654     assert(vme_context->vme_state.bo->virtual);
655     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
656         
657         vme_state_message[0] = 0x10010101;
658         vme_state_message[1] = 0x100F0F0F;
659         vme_state_message[2] = 0x10010101;
660         vme_state_message[3] = 0x000F0F0F;
661         for(i = 4; i < 14; i++) {
662                 vme_state_message[i] = 0x00000000;
663         }       
664
665     for(i = 14; i < 32; i++) {
666         vme_state_message[i] = 0x00000000;
667     }
668
669     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
670
671     dri_bo_unmap( vme_context->vme_state.bo);
672     return VA_STATUS_SUCCESS;
673 }
674
675 static void gen6_vme_pipeline_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
676 {
677     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
678
679     BEGIN_BATCH(batch, 1);
680     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
681     ADVANCE_BATCH(batch);
682 }
683
684 static void gen6_vme_state_base_address(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
685 {
686     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
687     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
688
689     BEGIN_BATCH(batch, 10);
690
691     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 8);
692
693     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
694     OUT_RELOC(batch, vme_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
695     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Dynamic State Base Address
696     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Indirect Object Base Address
697     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Instruction Base Address
698
699     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound      
700     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
701     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
702     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
703
704     /*
705       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
706       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
707     */
708
709     ADVANCE_BATCH(batch);
710 }
711
712 static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
713 {
714     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
715     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
716
717     BEGIN_BATCH(batch, 8);
718
719     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6);                                  /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
720     OUT_BATCH(batch, 0);                                                                                                /*Scratch Space Base Pointer and Space*/
721     OUT_BATCH(batch, (vme_context->vfe_state.max_num_threads << 16) 
722               | (vme_context->vfe_state.num_urb_entries << 8) 
723               | (vme_context->vfe_state.gpgpu_mode << 2) );     /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
724     OUT_BATCH(batch, 0);                                                                                                /*Debug: Object ID*/
725     OUT_BATCH(batch, (vme_context->vfe_state.urb_entry_size << 16) 
726               | vme_context->vfe_state.curbe_allocation_size);                          /*URB Entry Allocation Size , CURBE Allocation Size*/
727     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
728     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
729     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
730         
731     ADVANCE_BATCH(batch);
732
733 }
734
735 static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
736 {
737     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
738     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
739
740     BEGIN_BATCH(batch, 4);
741
742     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
743     OUT_BATCH(batch, 0);
744
745     OUT_BATCH(batch, CURBE_TOTAL_DATA_LENGTH);
746     OUT_RELOC(batch, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
747
748     ADVANCE_BATCH(batch);
749 }
750
751 static void gen6_vme_idrt(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
752 {
753     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
754     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
755
756     BEGIN_BATCH(batch, 4);
757
758     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);     
759     OUT_BATCH(batch, 0);
760     OUT_BATCH(batch, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
761     OUT_RELOC(batch, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
762
763     ADVANCE_BATCH(batch);
764 }
765
766 static int gen6_vme_media_object(VADriverContextP ctx, 
767                                  struct encode_state *encode_state,
768                                  int mb_x, int mb_y,
769                                  int kernel,
770                                  struct gen6_encoder_context *gen6_encoder_context)
771 {
772     struct i965_driver_data *i965 = i965_driver_data(ctx);
773     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
774     struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
775     int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
776     int len_in_dowrds = 6 + 1;
777
778     BEGIN_BATCH(batch, len_in_dowrds);
779     
780     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
781     OUT_BATCH(batch, kernel);           /*Interface Descriptor Offset*/ 
782     OUT_BATCH(batch, 0);
783     OUT_BATCH(batch, 0);
784     OUT_BATCH(batch, 0);
785     OUT_BATCH(batch, 0);
786    
787     /*inline data */
788     OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x);                        /*M0.0 Refrence0 X,Y, not used in Intra*/
789     ADVANCE_BATCH(batch);
790
791     return len_in_dowrds * 4;
792 }
793
794 static void gen6_vme_media_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
795 {
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
798     dri_bo *bo;
799
800     /* constant buffer */
801     dri_bo_unreference(vme_context->curbe.bo);
802     bo = dri_bo_alloc(i965->intel.bufmgr,
803                       "Buffer",
804                       CURBE_TOTAL_DATA_LENGTH, 64);
805     assert(bo);
806     vme_context->curbe.bo = bo;
807
808     dri_bo_unreference(vme_context->surface_state_binding_table.bo);
809     bo = dri_bo_alloc(i965->intel.bufmgr,
810                       "surface state & binding table",
811                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6,
812                       4096);
813     assert(bo);
814     vme_context->surface_state_binding_table.bo = bo;
815
816     /* interface descriptor remapping table */
817     dri_bo_unreference(vme_context->idrt.bo);
818     bo = dri_bo_alloc(i965->intel.bufmgr, 
819                       "Buffer", 
820                       MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
821     assert(bo);
822     vme_context->idrt.bo = bo;
823
824     /* VME output buffer */
825     dri_bo_unreference(vme_context->vme_output.bo);
826     vme_context->vme_output.bo = NULL;
827
828     /* VME state */
829     dri_bo_unreference(vme_context->vme_state.bo);
830     bo = dri_bo_alloc(i965->intel.bufmgr,
831                       "Buffer",
832                       1024*16, 64);
833     assert(bo);
834     vme_context->vme_state.bo = bo;
835
836     vme_context->vfe_state.max_num_threads = 60 - 1;
837     vme_context->vfe_state.num_urb_entries = 16;
838     vme_context->vfe_state.gpgpu_mode = 0;
839     vme_context->vfe_state.urb_entry_size = 59 - 1;
840     vme_context->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
841 }
842
843 static void gen6_vme_pipeline_programing(VADriverContextP ctx, 
844                                          struct encode_state *encode_state,
845                                          struct gen6_encoder_context *gen6_encoder_context)
846 {
847     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
848     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
849     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
850     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
851     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
852     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
853     int emit_new_state = 1, object_len_in_bytes;
854     int x, y;
855
856     intel_batchbuffer_start_atomic(batch, 0x1000);
857
858     for(y = 0; y < height_in_mbs; y++){
859         for(x = 0; x < width_in_mbs; x++){      
860
861             if (emit_new_state) {
862                 /*Step1: MI_FLUSH/PIPE_CONTROL*/
863                 intel_batchbuffer_emit_mi_flush(batch);
864
865                 /*Step2: State command PIPELINE_SELECT*/
866                 gen6_vme_pipeline_select(ctx, gen6_encoder_context);
867
868                 /*Step3: State commands configuring pipeline states*/
869                 gen6_vme_state_base_address(ctx, gen6_encoder_context);
870                 gen6_vme_vfe_state(ctx, gen6_encoder_context);
871                 gen6_vme_curbe_load(ctx, gen6_encoder_context);
872                 gen6_vme_idrt(ctx, gen6_encoder_context);
873
874                 emit_new_state = 0;
875             }
876
877             /*Step4: Primitive commands*/
878             object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, gen6_encoder_context);
879
880             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
881                 assert(0);
882                 intel_batchbuffer_end_atomic(batch);    
883                 intel_batchbuffer_flush(batch);
884                 emit_new_state = 1;
885                 intel_batchbuffer_start_atomic(batch, 0x1000);
886             }
887         }
888     }
889
890     intel_batchbuffer_end_atomic(batch);        
891 }
892
893 static VAStatus gen6_vme_prepare(VADriverContextP ctx, 
894                                  struct encode_state *encode_state,
895                                  struct gen6_encoder_context *gen6_encoder_context)
896 {
897     struct i965_driver_data *i965 = i965_driver_data(ctx);
898     VAStatus vaStatus = VA_STATUS_SUCCESS;
899     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
900     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
901         
902     /*Setup all the memory object*/
903     if (IS_GEN7(i965->intel.device_id))
904         gen7_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
905     else
906         gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
907
908     gen6_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
909     gen6_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
910     gen6_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
911
912     /*Programing media pipeline*/
913     gen6_vme_pipeline_programing(ctx, encode_state, gen6_encoder_context);
914
915     return vaStatus;
916 }
917
918 static VAStatus gen6_vme_run(VADriverContextP ctx, 
919                              struct encode_state *encode_state,
920                              struct gen6_encoder_context *gen6_encoder_context)
921 {
922     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
923
924     intel_batchbuffer_flush(batch);
925
926     return VA_STATUS_SUCCESS;
927 }
928
929 static VAStatus gen6_vme_stop(VADriverContextP ctx, 
930                               struct encode_state *encode_state,
931                               struct gen6_encoder_context *gen6_encoder_context)
932 {
933     return VA_STATUS_SUCCESS;
934 }
935
936 VAStatus gen6_vme_pipeline(VADriverContextP ctx,
937                            VAProfile profile,
938                            struct encode_state *encode_state,
939                            struct gen6_encoder_context *gen6_encoder_context)
940 {
941     gen6_vme_media_init(ctx, gen6_encoder_context);
942     gen6_vme_prepare(ctx, encode_state, gen6_encoder_context);
943     gen6_vme_run(ctx, encode_state, gen6_encoder_context);
944     gen6_vme_stop(ctx, encode_state, gen6_encoder_context);
945
946     return VA_STATUS_SUCCESS;
947 }
948
949 Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
950 {
951     struct i965_driver_data *i965 = i965_driver_data(ctx);
952     int i;
953
954     if (IS_GEN7(i965->intel.device_id))
955         memcpy(vme_context->vme_kernels, gen7_vme_kernels, sizeof(vme_context->vme_kernels));
956     else
957         memcpy(vme_context->vme_kernels, gen6_vme_kernels, sizeof(vme_context->vme_kernels));
958
959     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
960         /*Load kernel into GPU memory*/ 
961         struct i965_kernel *kernel = &vme_context->vme_kernels[i];
962
963         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
964                                   kernel->name, 
965                                   kernel->size,
966                                   0x1000);
967         assert(kernel->bo);
968         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
969     }
970     
971     return True;
972 }
973
974 Bool gen6_vme_context_destroy(struct gen6_vme_context *vme_context)
975 {
976     int i;
977
978     dri_bo_unreference(vme_context->idrt.bo);
979     vme_context->idrt.bo = NULL;
980
981     dri_bo_unreference(vme_context->surface_state_binding_table.bo);
982     vme_context->surface_state_binding_table.bo = NULL;
983
984     dri_bo_unreference(vme_context->curbe.bo);
985     vme_context->curbe.bo = NULL;
986
987     dri_bo_unreference(vme_context->vme_output.bo);
988     vme_context->vme_output.bo = NULL;
989
990     dri_bo_unreference(vme_context->vme_state.bo);
991     vme_context->vme_state.bo = NULL;
992
993     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
994         /*Load kernel into GPU memory*/ 
995         struct i965_kernel *kernel = &vme_context->vme_kernels[i];
996
997         dri_bo_unreference(kernel->bo);
998         kernel->bo = NULL;
999     }
1000
1001     return True;
1002 }