2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Li Xiaowei <xiaowei.a.li@intel.com>
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "gen75_vpp_gpe.h"
40 #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
41 #define MAX_MEDIA_SURFACES_GEN6 34
43 #define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
44 #define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
45 #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
47 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index))
48 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
50 #define CURBE_ALLOCATION_SIZE 37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH 4
55 i965_CreateSurfaces(VADriverContextP ctx,
60 VASurfaceID *surfaces);
63 i965_DestroySurfaces(VADriverContextP ctx,
64 VASurfaceID *surface_list,
67 /* Shaders information for sharpening */
68 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
69 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
71 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
72 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
74 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
75 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
77 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
79 "vpp: sharpening(horizontal blur)",
81 gen75_gpe_sharpening_h_blur,
82 sizeof(gen75_gpe_sharpening_h_blur),
86 "vpp: sharpening(vertical blur)",
88 gen75_gpe_sharpening_v_blur,
89 sizeof(gen75_gpe_sharpening_v_blur),
93 "vpp: sharpening(unmask)",
95 gen75_gpe_sharpening_unmask,
96 sizeof(gen75_gpe_sharpening_unmask),
102 gpe_surfaces_setup(VADriverContextP ctx,
103 struct vpp_gpe_context *vpp_gpe_ctx)
105 struct i965_driver_data *i965 = i965_driver_data(ctx);
106 struct object_surface *obj_surface;
108 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
109 vpp_gpe_ctx->backward_surf_sum) * 2;
111 /* Binding input NV12 surfaces (Luma + Chroma)*/
112 for( i = 0; i < input_surface_sum; i += 2){
113 obj_surface = SURFACE(vpp_gpe_ctx->surface_input[i/2]);
115 vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx,
116 &vpp_gpe_ctx->gpe_ctx,
118 BINDING_TABLE_OFFSET(i),
119 SURFACE_STATE_OFFSET(i));
121 vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx,
122 &vpp_gpe_ctx->gpe_ctx,
124 BINDING_TABLE_OFFSET(i + 1),
125 SURFACE_STATE_OFFSET(i + 1));
128 /* Binding output NV12 surface(Luma + Chroma) */
129 obj_surface = SURFACE(vpp_gpe_ctx->surface_output);
131 vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx,
132 &vpp_gpe_ctx->gpe_ctx,
134 BINDING_TABLE_OFFSET(input_surface_sum),
135 SURFACE_STATE_OFFSET(input_surface_sum));
136 vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx,
137 &vpp_gpe_ctx->gpe_ctx,
139 BINDING_TABLE_OFFSET(input_surface_sum + 1),
140 SURFACE_STATE_OFFSET(input_surface_sum + 1));
141 /* Bind kernel return buffer surface */
142 vpp_gpe_ctx->vpp_buffer_surface_setup(ctx,
143 &vpp_gpe_ctx->gpe_ctx,
144 &vpp_gpe_ctx->vpp_kernel_return,
145 BINDING_TABLE_OFFSET((input_surface_sum + 2)),
146 SURFACE_STATE_OFFSET(input_surface_sum + 2));
148 return VA_STATUS_SUCCESS;
152 gpe_interface_setup(VADriverContextP ctx,
153 struct vpp_gpe_context *vpp_gpe_ctx)
155 struct gen6_interface_descriptor_data *desc;
156 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
163 /*Setup the descritor table*/
164 for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
165 struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
166 assert(sizeof(*desc) == 32);
167 memset(desc, 0, sizeof(*desc));
168 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
169 desc->desc2.sampler_count = 0; /* FIXME: */
170 desc->desc2.sampler_state_pointer = NULL;
171 desc->desc3.binding_table_entry_count = 6; /* FIXME: */
172 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
173 desc->desc4.constant_urb_entry_read_offset = 0;
174 desc->desc4.constant_urb_entry_read_length = 0;
176 dri_bo_emit_reloc(bo,
177 I915_GEM_DOMAIN_INSTRUCTION, 0,
179 i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
186 return VA_STATUS_SUCCESS;
190 gpe_constant_setup(VADriverContextP ctx,
191 struct vpp_gpe_context *vpp_gpe_ctx){
192 dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
193 assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
194 /*Copy buffer into CURB*/
196 unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
197 memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
198 vpp_gpe_ctx->kernel_param_size);
200 dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
202 return VA_STATUS_SUCCESS;
206 gpe_fill_thread_parameters(VADriverContextP ctx,
207 struct vpp_gpe_context *vpp_gpe_ctx)
209 unsigned int *command_ptr;
210 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
211 unsigned char* position = NULL;
213 /* Thread inline data setting*/
214 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
215 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
217 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
219 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
220 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
226 /* copy thread inline data */
227 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
228 memcpy(command_ptr, position, size);
229 command_ptr += size/sizeof(int);
233 *command_ptr++ = MI_BATCH_BUFFER_END;
235 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
237 return VA_STATUS_SUCCESS;
241 gpe_pipeline_setup(VADriverContextP ctx,
242 struct vpp_gpe_context *vpp_gpe_ctx)
244 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
245 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
247 gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
249 gpe_fill_thread_parameters(ctx, vpp_gpe_ctx);
251 BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
252 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6));
253 OUT_RELOC(vpp_gpe_ctx->batch,
254 vpp_gpe_ctx->vpp_batchbuffer.bo,
255 I915_GEM_DOMAIN_COMMAND, 0,
257 ADVANCE_BATCH(vpp_gpe_ctx->batch);
259 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
261 return VA_STATUS_SUCCESS;
265 gpe_process_init(VADriverContextP ctx,
266 struct vpp_gpe_context *vpp_gpe_ctx)
268 struct i965_driver_data *i965 = i965_driver_data(ctx);
271 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
272 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
274 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
275 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
276 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
277 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
278 * vpp_gpe_ctx->vpp_kernel_return.size_block;
280 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
281 bo = dri_bo_alloc(i965->intel.bufmgr,
283 batch_buf_size, 0x1000);
284 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
285 dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
287 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
288 bo = dri_bo_alloc(i965->intel.bufmgr,
289 "vpp kernel return buffer",
290 kernel_return_size, 0x1000);
291 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
292 dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
294 i965_gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
296 return VA_STATUS_SUCCESS;
300 gpe_process_prepare(VADriverContextP ctx,
301 struct vpp_gpe_context *vpp_gpe_ctx)
303 /*Setup all the memory object*/
304 gpe_surfaces_setup(ctx, vpp_gpe_ctx);
305 gpe_interface_setup(ctx, vpp_gpe_ctx);
306 gpe_constant_setup(ctx, vpp_gpe_ctx);
308 /*Programing media pipeline*/
309 gpe_pipeline_setup(ctx, vpp_gpe_ctx);
311 return VA_STATUS_SUCCESS;
315 gpe_process_run(VADriverContextP ctx,
316 struct vpp_gpe_context *vpp_gpe_ctx)
318 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
320 return VA_STATUS_SUCCESS;
324 gen75_gpe_process(VADriverContextP ctx,
325 struct vpp_gpe_context * vpp_gpe_ctx)
327 VAStatus va_status = VA_STATUS_SUCCESS;
328 va_status = gpe_process_init(ctx, vpp_gpe_ctx);
329 va_status |=gpe_process_prepare(ctx, vpp_gpe_ctx);
330 va_status |=gpe_process_run(ctx, vpp_gpe_ctx);
336 gen75_gpe_process_sharpening(VADriverContextP ctx,
337 struct vpp_gpe_context * vpp_gpe_ctx)
339 VAStatus va_status = VA_STATUS_SUCCESS;
340 struct i965_driver_data *i965 = i965_driver_data(ctx);
341 VASurfaceID origin_in_surf_id = vpp_gpe_ctx-> surface_input[0];
342 VASurfaceID origin_out_surf_id = vpp_gpe_ctx-> surface_output;
344 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
345 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
346 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
347 VAProcFilterParameterBuffer* filter =
348 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
349 float sharpening_intensity = filter->value;
351 ThreadParameterSharpening thr_param;
352 unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
356 if(vpp_gpe_ctx->is_first_frame){
357 vpp_gpe_ctx->sub_shader_sum = 3;
358 i965_gpe_load_kernels(ctx,
359 &vpp_gpe_ctx->gpe_ctx,
360 gen75_vpp_sharpening_kernels,
361 vpp_gpe_ctx->sub_shader_sum);
364 if(!vpp_gpe_ctx->surface_tmp){
365 va_status = i965_CreateSurfaces(ctx,
366 vpp_gpe_ctx->in_frame_w,
367 vpp_gpe_ctx->in_frame_h,
370 &vpp_gpe_ctx->surface_tmp);
371 assert(va_status == VA_STATUS_SUCCESS);
373 struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
374 i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'),
378 assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
379 thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
380 thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
382 thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
383 thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
385 /* Step 1: horizontal blur process */
386 vpp_gpe_ctx->forward_surf_sum = 0;
387 vpp_gpe_ctx->backward_surf_sum = 0;
389 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
390 vpp_gpe_ctx->thread_param_size = thr_param_size;
391 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
392 *vpp_gpe_ctx->thread_num);
393 pos = vpp_gpe_ctx->thread_param;
394 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
395 thr_param.base.v_pos = 16 * i;
396 thr_param.base.h_pos = 0;
397 memcpy(pos, &thr_param, thr_param_size);
398 pos += thr_param_size;
401 vpp_gpe_ctx->sub_shader_index = 0;
402 va_status = gen75_gpe_process(ctx, vpp_gpe_ctx);
403 free(vpp_gpe_ctx->thread_param);
405 /* Step 2: vertical blur process */
406 vpp_gpe_ctx->surface_input[0] = vpp_gpe_ctx->surface_output;
407 vpp_gpe_ctx->surface_output = vpp_gpe_ctx->surface_tmp;
408 vpp_gpe_ctx->forward_surf_sum = 0;
409 vpp_gpe_ctx->backward_surf_sum = 0;
411 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
412 vpp_gpe_ctx->thread_param_size = thr_param_size;
413 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
414 *vpp_gpe_ctx->thread_num);
415 pos = vpp_gpe_ctx->thread_param;
416 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
417 thr_param.base.v_pos = 0;
418 thr_param.base.h_pos = 16 * i;
419 memcpy(pos, &thr_param, thr_param_size);
420 pos += thr_param_size;
423 vpp_gpe_ctx->sub_shader_index = 1;
424 gen75_gpe_process(ctx, vpp_gpe_ctx);
425 free(vpp_gpe_ctx->thread_param);
427 /* Step 3: apply the blur to original surface */
428 vpp_gpe_ctx->surface_input[0] = origin_in_surf_id;
429 vpp_gpe_ctx->surface_input[1] = vpp_gpe_ctx->surface_tmp;
430 vpp_gpe_ctx->surface_output = origin_out_surf_id;
431 vpp_gpe_ctx->forward_surf_sum = 1;
432 vpp_gpe_ctx->backward_surf_sum = 0;
434 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
435 vpp_gpe_ctx->thread_param_size = thr_param_size;
436 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
437 *vpp_gpe_ctx->thread_num);
438 pos = vpp_gpe_ctx->thread_param;
439 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
440 thr_param.base.v_pos = 4 * i;
441 thr_param.base.h_pos = 0;
442 memcpy(pos, &thr_param, thr_param_size);
443 pos += thr_param_size;
446 vpp_gpe_ctx->sub_shader_index = 2;
447 va_status = gen75_gpe_process(ctx, vpp_gpe_ctx);
448 free(vpp_gpe_ctx->thread_param);
453 VAStatus gen75_gpe_process_picture(VADriverContextP ctx,
454 struct vpp_gpe_context * vpp_gpe_ctx)
456 VAStatus va_status = VA_STATUS_SUCCESS;
457 struct i965_driver_data *i965 = i965_driver_data(ctx);
458 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
461 assert(pipe->num_filters == 1);
462 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
463 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
464 VAProcFilterParameterBuffer* filter =
465 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
467 assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
468 vpp_gpe_ctx->surface_input[0] = pipe->surface;
470 vpp_gpe_ctx->forward_surf_sum = 0;
471 vpp_gpe_ctx->backward_surf_sum = 0;
473 for(i = 0; i < pipe->num_forward_references; i ++)
475 vpp_gpe_ctx->surface_input[i + 1] = pipe->forward_references[i];
476 vpp_gpe_ctx->forward_surf_sum ++;
479 for(i = 0; i < pipe->num_backward_references; i ++)
481 vpp_gpe_ctx->surface_input[vpp_gpe_ctx->forward_surf_sum + 1 + i ] =
482 pipe->backward_references[i];
483 vpp_gpe_ctx->backward_surf_sum ++;
486 struct object_surface *obj_surface = SURFACE(vpp_gpe_ctx->surface_input[0]);
487 vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
488 vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
490 if(filter->type == VAProcFilterSharpening) {
491 va_status = gen75_gpe_process_sharpening(ctx, vpp_gpe_ctx);
493 va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
496 vpp_gpe_ctx->is_first_frame = 0;
502 gen75_gpe_context_destroy(VADriverContextP ctx,
503 struct vpp_gpe_context *vpp_gpe_ctx)
505 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
506 vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
508 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
509 vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
511 i965_gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
513 if(vpp_gpe_ctx->surface_tmp){
514 i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
515 vpp_gpe_ctx->surface_tmp = NULL;
518 free(vpp_gpe_ctx->batch);
524 gen75_gpe_context_init(VADriverContextP ctx)
526 struct i965_driver_data *i965 = i965_driver_data(ctx);
527 struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
528 struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
530 gpe_ctx->surface_state_binding_table.length =
531 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
532 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
533 gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
535 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
537 gpe_ctx->vfe_state.max_num_threads = 60 - 1;
538 gpe_ctx->vfe_state.num_urb_entries = 16;
539 gpe_ctx->vfe_state.gpgpu_mode = 0;
540 gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
541 gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
543 vpp_gpe_ctx->vpp_surface2_setup = gen7_gpe_surface2_setup;
544 vpp_gpe_ctx->vpp_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
545 vpp_gpe_ctx->vpp_buffer_surface_setup = gen7_gpe_buffer_suface_setup;
546 vpp_gpe_ctx->vpp_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup;
548 vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
550 vpp_gpe_ctx->is_first_frame = 1;
552 return (struct hw_context *)vpp_gpe_ctx;