2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Li Xiaowei <xiaowei.a.li@intel.com>
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
41 #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6 34
44 #define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
47 #define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
50 #define CURBE_ALLOCATION_SIZE 37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH 4
55 i965_CreateSurfaces(VADriverContextP ctx,
60 VASurfaceID *surfaces);
62 /* Shaders information for sharpening */
63 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
64 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
66 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
67 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
69 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
70 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
72 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
74 "vpp: sharpening(horizontal blur)",
76 gen75_gpe_sharpening_h_blur,
77 sizeof(gen75_gpe_sharpening_h_blur),
81 "vpp: sharpening(vertical blur)",
83 gen75_gpe_sharpening_v_blur,
84 sizeof(gen75_gpe_sharpening_v_blur),
88 "vpp: sharpening(unmask)",
90 gen75_gpe_sharpening_unmask,
91 sizeof(gen75_gpe_sharpening_unmask),
96 /* sharpening kernels for Broadwell */
97 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
98 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
100 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
101 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
103 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
104 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
107 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
109 "vpp: sharpening(horizontal blur)",
111 gen8_gpe_sharpening_h_blur,
112 sizeof(gen8_gpe_sharpening_h_blur),
116 "vpp: sharpening(vertical blur)",
118 gen8_gpe_sharpening_v_blur,
119 sizeof(gen8_gpe_sharpening_v_blur),
123 "vpp: sharpening(unmask)",
125 gen8_gpe_sharpening_unmask,
126 sizeof(gen8_gpe_sharpening_unmask),
132 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
133 struct vpp_gpe_context *vpp_gpe_ctx)
135 struct object_surface *obj_surface;
137 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
138 vpp_gpe_ctx->backward_surf_sum) * 2;
140 /* Binding input NV12 surfaces (Luma + Chroma)*/
141 for( i = 0; i < input_surface_sum; i += 2){
142 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
144 gen7_gpe_media_rw_surface_setup(ctx,
145 &vpp_gpe_ctx->gpe_ctx,
147 BINDING_TABLE_OFFSET_GEN7(i),
148 SURFACE_STATE_OFFSET_GEN7(i));
150 gen75_gpe_media_chroma_surface_setup(ctx,
151 &vpp_gpe_ctx->gpe_ctx,
153 BINDING_TABLE_OFFSET_GEN7(i + 1),
154 SURFACE_STATE_OFFSET_GEN7(i + 1));
157 /* Binding output NV12 surface(Luma + Chroma) */
158 obj_surface = vpp_gpe_ctx->surface_output_object;
160 gen7_gpe_media_rw_surface_setup(ctx,
161 &vpp_gpe_ctx->gpe_ctx,
163 BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
164 SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
165 gen75_gpe_media_chroma_surface_setup(ctx,
166 &vpp_gpe_ctx->gpe_ctx,
168 BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
169 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
170 /* Bind kernel return buffer surface */
171 gen7_gpe_buffer_suface_setup(ctx,
172 &vpp_gpe_ctx->gpe_ctx,
173 &vpp_gpe_ctx->vpp_kernel_return,
174 BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
175 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
177 return VA_STATUS_SUCCESS;
181 gen75_gpe_process_interface_setup(VADriverContextP ctx,
182 struct vpp_gpe_context *vpp_gpe_ctx)
184 struct gen6_interface_descriptor_data *desc;
185 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
192 /*Setup the descritor table*/
193 for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
194 struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
195 assert(sizeof(*desc) == 32);
196 memset(desc, 0, sizeof(*desc));
197 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
198 desc->desc2.sampler_count = 0; /* FIXME: */
199 desc->desc2.sampler_state_pointer = 0;
200 desc->desc3.binding_table_entry_count = 6; /* FIXME: */
201 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
202 desc->desc4.constant_urb_entry_read_offset = 0;
203 desc->desc4.constant_urb_entry_read_length = 0;
205 dri_bo_emit_reloc(bo,
206 I915_GEM_DOMAIN_INSTRUCTION, 0,
208 i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
215 return VA_STATUS_SUCCESS;
219 gen75_gpe_process_constant_fill(VADriverContextP ctx,
220 struct vpp_gpe_context *vpp_gpe_ctx)
222 dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
223 assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
224 unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
225 memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
226 vpp_gpe_ctx->kernel_param_size);
227 dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
229 return VA_STATUS_SUCCESS;
233 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
234 struct vpp_gpe_context *vpp_gpe_ctx)
236 unsigned int *command_ptr;
237 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
238 unsigned char* position = NULL;
240 /* Thread inline data setting*/
241 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
242 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
244 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
246 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
247 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
253 /* copy thread inline data */
254 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
255 memcpy(command_ptr, position, size);
256 command_ptr += size/sizeof(int);
260 *command_ptr++ = MI_BATCH_BUFFER_END;
262 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
264 return VA_STATUS_SUCCESS;
268 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
269 struct vpp_gpe_context *vpp_gpe_ctx)
271 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
272 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
274 gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
276 gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
278 BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
279 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
280 OUT_RELOC(vpp_gpe_ctx->batch,
281 vpp_gpe_ctx->vpp_batchbuffer.bo,
282 I915_GEM_DOMAIN_COMMAND, 0,
284 ADVANCE_BATCH(vpp_gpe_ctx->batch);
286 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
288 return VA_STATUS_SUCCESS;
292 gen75_gpe_process_init(VADriverContextP ctx,
293 struct vpp_gpe_context *vpp_gpe_ctx)
295 struct i965_driver_data *i965 = i965_driver_data(ctx);
298 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
299 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
301 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
302 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
303 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
304 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
305 * vpp_gpe_ctx->vpp_kernel_return.size_block;
307 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
308 bo = dri_bo_alloc(i965->intel.bufmgr,
310 batch_buf_size, 0x1000);
311 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
312 dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
314 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
315 bo = dri_bo_alloc(i965->intel.bufmgr,
316 "vpp kernel return buffer",
317 kernel_return_size, 0x1000);
318 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
319 dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
321 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
323 return VA_STATUS_SUCCESS;
327 gen75_gpe_process_prepare(VADriverContextP ctx,
328 struct vpp_gpe_context *vpp_gpe_ctx)
330 /*Setup all the memory object*/
331 gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
332 gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
333 //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
335 /*Programing media pipeline*/
336 gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
338 return VA_STATUS_SUCCESS;
342 gen75_gpe_process_run(VADriverContextP ctx,
343 struct vpp_gpe_context *vpp_gpe_ctx)
345 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
347 return VA_STATUS_SUCCESS;
351 gen75_gpe_process(VADriverContextP ctx,
352 struct vpp_gpe_context * vpp_gpe_ctx)
354 VAStatus va_status = VA_STATUS_SUCCESS;
356 va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
357 if (va_status != VA_STATUS_SUCCESS)
360 va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
361 if (va_status != VA_STATUS_SUCCESS)
364 va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
365 if (va_status != VA_STATUS_SUCCESS)
368 return VA_STATUS_SUCCESS;
372 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
373 struct vpp_gpe_context *vpp_gpe_ctx)
375 struct object_surface *obj_surface;
377 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
378 vpp_gpe_ctx->backward_surf_sum) * 2;
380 /* Binding input NV12 surfaces (Luma + Chroma)*/
381 for( i = 0; i < input_surface_sum; i += 2){
382 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
384 gen8_gpe_media_rw_surface_setup(ctx,
385 &vpp_gpe_ctx->gpe_ctx,
387 BINDING_TABLE_OFFSET_GEN8(i),
388 SURFACE_STATE_OFFSET_GEN8(i));
390 gen8_gpe_media_chroma_surface_setup(ctx,
391 &vpp_gpe_ctx->gpe_ctx,
393 BINDING_TABLE_OFFSET_GEN8(i + 1),
394 SURFACE_STATE_OFFSET_GEN8(i + 1));
397 /* Binding output NV12 surface(Luma + Chroma) */
398 obj_surface = vpp_gpe_ctx->surface_output_object;
400 gen8_gpe_media_rw_surface_setup(ctx,
401 &vpp_gpe_ctx->gpe_ctx,
403 BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
404 SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
405 gen8_gpe_media_chroma_surface_setup(ctx,
406 &vpp_gpe_ctx->gpe_ctx,
408 BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
409 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
410 /* Bind kernel return buffer surface */
411 gen7_gpe_buffer_suface_setup(ctx,
412 &vpp_gpe_ctx->gpe_ctx,
413 &vpp_gpe_ctx->vpp_kernel_return,
414 BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
415 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
417 return VA_STATUS_SUCCESS;
421 gen8_gpe_process_interface_setup(VADriverContextP ctx,
422 struct vpp_gpe_context *vpp_gpe_ctx)
424 struct gen8_interface_descriptor_data *desc;
425 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
430 desc = (struct gen8_interface_descriptor_data *)(bo->virtual
431 + vpp_gpe_ctx->gpe_ctx.idrt_offset);
433 /*Setup the descritor table*/
434 for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
435 struct i965_kernel *kernel;
436 kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
437 assert(sizeof(*desc) == 32);
438 /*Setup the descritor table*/
439 memset(desc, 0, sizeof(*desc));
440 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
441 desc->desc3.sampler_count = 0; /* FIXME: */
442 desc->desc3.sampler_state_pointer = 0;
443 desc->desc4.binding_table_entry_count = 6; /* FIXME: */
444 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
445 desc->desc5.constant_urb_entry_read_offset = 0;
446 desc->desc5.constant_urb_entry_read_length = 0;
453 return VA_STATUS_SUCCESS;
457 gen8_gpe_process_constant_fill(VADriverContextP ctx,
458 struct vpp_gpe_context *vpp_gpe_ctx)
460 dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
461 assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
462 unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
463 memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
464 vpp_gpe_ctx->kernel_param_size);
465 dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
467 return VA_STATUS_SUCCESS;
471 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
472 struct vpp_gpe_context *vpp_gpe_ctx)
474 unsigned int *command_ptr;
475 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
476 unsigned char* position = NULL;
478 /* Thread inline data setting*/
479 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
480 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
482 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
484 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
485 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
491 /* copy thread inline data */
492 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
493 memcpy(command_ptr, position, size);
494 command_ptr += size/sizeof(int);
496 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
501 *command_ptr++ = MI_BATCH_BUFFER_END;
503 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
505 return VA_STATUS_SUCCESS;
509 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
510 struct vpp_gpe_context *vpp_gpe_ctx)
512 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
513 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
515 gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
517 gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
519 BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
520 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
521 OUT_RELOC(vpp_gpe_ctx->batch,
522 vpp_gpe_ctx->vpp_batchbuffer.bo,
523 I915_GEM_DOMAIN_COMMAND, 0,
525 OUT_BATCH(vpp_gpe_ctx->batch, 0);
527 ADVANCE_BATCH(vpp_gpe_ctx->batch);
529 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
531 return VA_STATUS_SUCCESS;
535 gen8_gpe_process_init(VADriverContextP ctx,
536 struct vpp_gpe_context *vpp_gpe_ctx)
538 struct i965_driver_data *i965 = i965_driver_data(ctx);
541 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
542 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
544 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
545 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
546 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
548 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
549 * vpp_gpe_ctx->vpp_kernel_return.size_block;
551 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
552 bo = dri_bo_alloc(i965->intel.bufmgr,
554 batch_buf_size, 0x1000);
555 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
556 dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
558 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
559 bo = dri_bo_alloc(i965->intel.bufmgr,
560 "vpp kernel return buffer",
561 kernel_return_size, 0x1000);
562 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
563 dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
565 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
567 return VA_STATUS_SUCCESS;
571 gen8_gpe_process_prepare(VADriverContextP ctx,
572 struct vpp_gpe_context *vpp_gpe_ctx)
574 /*Setup all the memory object*/
575 gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
576 gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
577 //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
579 /*Programing media pipeline*/
580 gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
582 return VA_STATUS_SUCCESS;
586 gen8_gpe_process_run(VADriverContextP ctx,
587 struct vpp_gpe_context *vpp_gpe_ctx)
589 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
591 return VA_STATUS_SUCCESS;
595 gen8_gpe_process(VADriverContextP ctx,
596 struct vpp_gpe_context * vpp_gpe_ctx)
598 VAStatus va_status = VA_STATUS_SUCCESS;
600 va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
601 if (va_status != VA_STATUS_SUCCESS)
604 va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
605 if (va_status != VA_STATUS_SUCCESS)
608 va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
609 if (va_status != VA_STATUS_SUCCESS)
612 return VA_STATUS_SUCCESS;
616 vpp_gpe_process(VADriverContextP ctx,
617 struct vpp_gpe_context * vpp_gpe_ctx)
619 struct i965_driver_data *i965 = i965_driver_data(ctx);
620 if (IS_HASWELL(i965->intel.device_info))
621 return gen75_gpe_process(ctx, vpp_gpe_ctx);
622 else if (IS_GEN8(i965->intel.device_info))
623 return gen8_gpe_process(ctx, vpp_gpe_ctx);
625 return VA_STATUS_ERROR_UNIMPLEMENTED;
629 vpp_gpe_process_sharpening(VADriverContextP ctx,
630 struct vpp_gpe_context * vpp_gpe_ctx)
632 VAStatus va_status = VA_STATUS_SUCCESS;
633 struct i965_driver_data *i965 = i965_driver_data(ctx);
634 struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
635 struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
637 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
638 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
639 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
641 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
644 !obj_buf->buffer_store ||
645 !obj_buf->buffer_store->buffer)
648 VAProcFilterParameterBuffer* filter =
649 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
650 float sharpening_intensity = filter->value;
652 ThreadParameterSharpening thr_param;
653 unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
657 if(vpp_gpe_ctx->is_first_frame){
658 vpp_gpe_ctx->sub_shader_sum = 3;
659 struct i965_kernel * vpp_kernels;
660 if (IS_HASWELL(i965->intel.device_info))
661 vpp_kernels = gen75_vpp_sharpening_kernels;
662 else if (IS_GEN8(i965->intel.device_info))
663 vpp_kernels = gen8_vpp_sharpening_kernels;
665 vpp_gpe_ctx->gpe_load_kernels(ctx,
666 &vpp_gpe_ctx->gpe_ctx,
668 vpp_gpe_ctx->sub_shader_sum);
671 if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
672 va_status = i965_CreateSurfaces(ctx,
673 vpp_gpe_ctx->in_frame_w,
674 vpp_gpe_ctx->in_frame_h,
677 &vpp_gpe_ctx->surface_tmp);
678 assert(va_status == VA_STATUS_SUCCESS);
680 struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
684 i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
686 vpp_gpe_ctx->surface_tmp_object = obj_surf;
690 assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
691 thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
692 thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
694 thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
695 thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
697 /* Step 1: horizontal blur process */
698 vpp_gpe_ctx->forward_surf_sum = 0;
699 vpp_gpe_ctx->backward_surf_sum = 0;
701 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
702 vpp_gpe_ctx->thread_param_size = thr_param_size;
703 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
704 *vpp_gpe_ctx->thread_num);
705 pos = vpp_gpe_ctx->thread_param;
708 return VA_STATUS_ERROR_ALLOCATION_FAILED;
711 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
712 thr_param.base.v_pos = 16 * i;
713 thr_param.base.h_pos = 0;
714 memcpy(pos, &thr_param, thr_param_size);
715 pos += thr_param_size;
718 vpp_gpe_ctx->sub_shader_index = 0;
719 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
720 free(vpp_gpe_ctx->thread_param);
722 /* Step 2: vertical blur process */
723 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
724 vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
725 vpp_gpe_ctx->forward_surf_sum = 0;
726 vpp_gpe_ctx->backward_surf_sum = 0;
728 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
729 vpp_gpe_ctx->thread_param_size = thr_param_size;
730 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
731 *vpp_gpe_ctx->thread_num);
732 pos = vpp_gpe_ctx->thread_param;
735 return VA_STATUS_ERROR_ALLOCATION_FAILED;
738 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
739 thr_param.base.v_pos = 0;
740 thr_param.base.h_pos = 16 * i;
741 memcpy(pos, &thr_param, thr_param_size);
742 pos += thr_param_size;
745 vpp_gpe_ctx->sub_shader_index = 1;
746 vpp_gpe_process(ctx, vpp_gpe_ctx);
747 free(vpp_gpe_ctx->thread_param);
749 /* Step 3: apply the blur to original surface */
750 vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface;
751 vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object;
752 vpp_gpe_ctx->surface_output_object = origin_out_obj_surface;
753 vpp_gpe_ctx->forward_surf_sum = 1;
754 vpp_gpe_ctx->backward_surf_sum = 0;
756 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
757 vpp_gpe_ctx->thread_param_size = thr_param_size;
758 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
759 *vpp_gpe_ctx->thread_num);
760 pos = vpp_gpe_ctx->thread_param;
763 return VA_STATUS_ERROR_ALLOCATION_FAILED;
766 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
767 thr_param.base.v_pos = 4 * i;
768 thr_param.base.h_pos = 0;
769 memcpy(pos, &thr_param, thr_param_size);
770 pos += thr_param_size;
773 vpp_gpe_ctx->sub_shader_index = 2;
774 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
775 free(vpp_gpe_ctx->thread_param);
780 return VA_STATUS_ERROR_INVALID_PARAMETER;
783 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
784 struct vpp_gpe_context * vpp_gpe_ctx)
786 VAStatus va_status = VA_STATUS_SUCCESS;
787 struct i965_driver_data *i965 = i965_driver_data(ctx);
788 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
789 VAProcFilterParameterBuffer* filter = NULL;
791 struct object_surface *obj_surface = NULL;
793 if (pipe->num_filters && !pipe->filters)
796 for(i = 0; i < pipe->num_filters; i++){
797 struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
799 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
802 !obj_buf->buffer_store ||
803 !obj_buf->buffer_store->buffer)
806 filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
807 if(filter->type == VAProcFilterSharpening){
812 assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
813 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
815 vpp_gpe_ctx->forward_surf_sum = 0;
816 vpp_gpe_ctx->backward_surf_sum = 0;
818 for(i = 0; i < pipe->num_forward_references; i ++)
820 obj_surface = SURFACE(pipe->forward_references[i]);
823 vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
824 vpp_gpe_ctx->forward_surf_sum++;
827 for(i = 0; i < pipe->num_backward_references; i ++)
829 obj_surface = SURFACE(pipe->backward_references[i]);
832 vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
833 vpp_gpe_ctx->backward_surf_sum++;
836 obj_surface = vpp_gpe_ctx->surface_input_object[0];
837 vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
838 vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
840 if(filter && filter->type == VAProcFilterSharpening) {
841 va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
843 va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
846 vpp_gpe_ctx->is_first_frame = 0;
851 return VA_STATUS_ERROR_INVALID_PARAMETER;
855 vpp_gpe_context_destroy(VADriverContextP ctx,
856 struct vpp_gpe_context *vpp_gpe_ctx)
858 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
859 vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
861 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
862 vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
864 vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
866 if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
867 assert(vpp_gpe_ctx->surface_tmp_object != NULL);
868 i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
869 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
870 vpp_gpe_ctx->surface_tmp_object = NULL;
873 free(vpp_gpe_ctx->batch);
878 struct vpp_gpe_context *
879 vpp_gpe_context_init(VADriverContextP ctx)
881 struct i965_driver_data *i965 = i965_driver_data(ctx);
882 struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
883 struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
885 assert(IS_HASWELL(i965->intel.device_info) ||
886 IS_GEN8(i965->intel.device_info));
888 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
889 vpp_gpe_ctx->surface_tmp_object = NULL;
890 vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
891 vpp_gpe_ctx->is_first_frame = 1;
893 gpe_ctx->vfe_state.max_num_threads = 60 - 1;
894 gpe_ctx->vfe_state.num_urb_entries = 16;
895 gpe_ctx->vfe_state.gpgpu_mode = 0;
896 gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
897 gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
899 if (IS_HASWELL(i965->intel.device_info)) {
900 vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
901 vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
902 vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
903 gpe_ctx->surface_state_binding_table.length =
904 (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
906 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
907 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
908 gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
910 } else if (IS_GEN8(i965->intel.device_info)) {
911 vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
912 vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
913 vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
914 gpe_ctx->surface_state_binding_table.length =
915 (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
917 gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
918 gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;