2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Li Xiaowei <xiaowei.a.li@intel.com>
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
41 #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6 34
44 #define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
47 #define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
50 #define CURBE_ALLOCATION_SIZE 37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH 4
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
66 "vpp: sharpening(horizontal blur)",
68 gen75_gpe_sharpening_h_blur,
69 sizeof(gen75_gpe_sharpening_h_blur),
73 "vpp: sharpening(vertical blur)",
75 gen75_gpe_sharpening_v_blur,
76 sizeof(gen75_gpe_sharpening_v_blur),
80 "vpp: sharpening(unmask)",
82 gen75_gpe_sharpening_unmask,
83 sizeof(gen75_gpe_sharpening_unmask),
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
101 "vpp: sharpening(horizontal blur)",
103 gen8_gpe_sharpening_h_blur,
104 sizeof(gen8_gpe_sharpening_h_blur),
108 "vpp: sharpening(vertical blur)",
110 gen8_gpe_sharpening_v_blur,
111 sizeof(gen8_gpe_sharpening_v_blur),
115 "vpp: sharpening(unmask)",
117 gen8_gpe_sharpening_unmask,
118 sizeof(gen8_gpe_sharpening_unmask),
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125 struct vpp_gpe_context *vpp_gpe_ctx)
127 struct object_surface *obj_surface;
129 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130 vpp_gpe_ctx->backward_surf_sum) * 2;
132 /* Binding input NV12 surfaces (Luma + Chroma)*/
133 for( i = 0; i < input_surface_sum; i += 2){
134 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
136 gen7_gpe_media_rw_surface_setup(ctx,
137 &vpp_gpe_ctx->gpe_ctx,
139 BINDING_TABLE_OFFSET_GEN7(i),
140 SURFACE_STATE_OFFSET_GEN7(i));
142 gen75_gpe_media_chroma_surface_setup(ctx,
143 &vpp_gpe_ctx->gpe_ctx,
145 BINDING_TABLE_OFFSET_GEN7(i + 1),
146 SURFACE_STATE_OFFSET_GEN7(i + 1));
149 /* Binding output NV12 surface(Luma + Chroma) */
150 obj_surface = vpp_gpe_ctx->surface_output_object;
152 gen7_gpe_media_rw_surface_setup(ctx,
153 &vpp_gpe_ctx->gpe_ctx,
155 BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
156 SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
157 gen75_gpe_media_chroma_surface_setup(ctx,
158 &vpp_gpe_ctx->gpe_ctx,
160 BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
161 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
162 /* Bind kernel return buffer surface */
163 gen7_gpe_buffer_suface_setup(ctx,
164 &vpp_gpe_ctx->gpe_ctx,
165 &vpp_gpe_ctx->vpp_kernel_return,
166 BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
167 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
169 return VA_STATUS_SUCCESS;
173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
174 struct vpp_gpe_context *vpp_gpe_ctx)
176 struct gen6_interface_descriptor_data *desc;
177 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
184 /*Setup the descritor table*/
185 for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
186 struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
187 assert(sizeof(*desc) == 32);
188 memset(desc, 0, sizeof(*desc));
189 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
190 desc->desc2.sampler_count = 0; /* FIXME: */
191 desc->desc2.sampler_state_pointer = 0;
192 desc->desc3.binding_table_entry_count = 6; /* FIXME: */
193 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
194 desc->desc4.constant_urb_entry_read_offset = 0;
195 desc->desc4.constant_urb_entry_read_length = 0;
197 dri_bo_emit_reloc(bo,
198 I915_GEM_DOMAIN_INSTRUCTION, 0,
200 i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
207 return VA_STATUS_SUCCESS;
211 gen75_gpe_process_constant_fill(VADriverContextP ctx,
212 struct vpp_gpe_context *vpp_gpe_ctx)
214 dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
215 assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
216 unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
217 memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
218 vpp_gpe_ctx->kernel_param_size);
219 dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
221 return VA_STATUS_SUCCESS;
225 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
226 struct vpp_gpe_context *vpp_gpe_ctx)
228 unsigned int *command_ptr;
229 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
230 unsigned char* position = NULL;
232 /* Thread inline data setting*/
233 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
234 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
236 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
238 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
239 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
245 /* copy thread inline data */
246 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
247 memcpy(command_ptr, position, size);
248 command_ptr += size/sizeof(int);
252 *command_ptr++ = MI_BATCH_BUFFER_END;
254 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
256 return VA_STATUS_SUCCESS;
260 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
261 struct vpp_gpe_context *vpp_gpe_ctx)
263 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
264 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
266 gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
268 gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
270 BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
271 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
272 OUT_RELOC(vpp_gpe_ctx->batch,
273 vpp_gpe_ctx->vpp_batchbuffer.bo,
274 I915_GEM_DOMAIN_COMMAND, 0,
276 ADVANCE_BATCH(vpp_gpe_ctx->batch);
278 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
280 return VA_STATUS_SUCCESS;
284 gen75_gpe_process_init(VADriverContextP ctx,
285 struct vpp_gpe_context *vpp_gpe_ctx)
287 struct i965_driver_data *i965 = i965_driver_data(ctx);
290 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
291 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
293 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
294 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
295 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
296 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
297 * vpp_gpe_ctx->vpp_kernel_return.size_block;
299 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
300 bo = dri_bo_alloc(i965->intel.bufmgr,
302 batch_buf_size, 0x1000);
303 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
304 dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
306 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
307 bo = dri_bo_alloc(i965->intel.bufmgr,
308 "vpp kernel return buffer",
309 kernel_return_size, 0x1000);
310 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
311 dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
313 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
315 return VA_STATUS_SUCCESS;
319 gen75_gpe_process_prepare(VADriverContextP ctx,
320 struct vpp_gpe_context *vpp_gpe_ctx)
322 /*Setup all the memory object*/
323 gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
324 gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
325 //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
327 /*Programing media pipeline*/
328 gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
330 return VA_STATUS_SUCCESS;
334 gen75_gpe_process_run(VADriverContextP ctx,
335 struct vpp_gpe_context *vpp_gpe_ctx)
337 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
339 return VA_STATUS_SUCCESS;
343 gen75_gpe_process(VADriverContextP ctx,
344 struct vpp_gpe_context * vpp_gpe_ctx)
346 VAStatus va_status = VA_STATUS_SUCCESS;
348 va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
349 if (va_status != VA_STATUS_SUCCESS)
352 va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
353 if (va_status != VA_STATUS_SUCCESS)
356 va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
357 if (va_status != VA_STATUS_SUCCESS)
360 return VA_STATUS_SUCCESS;
364 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
365 struct vpp_gpe_context *vpp_gpe_ctx)
367 struct object_surface *obj_surface;
369 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
370 vpp_gpe_ctx->backward_surf_sum) * 2;
372 /* Binding input NV12 surfaces (Luma + Chroma)*/
373 for( i = 0; i < input_surface_sum; i += 2){
374 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
376 gen8_gpe_media_rw_surface_setup(ctx,
377 &vpp_gpe_ctx->gpe_ctx,
379 BINDING_TABLE_OFFSET_GEN8(i),
380 SURFACE_STATE_OFFSET_GEN8(i));
382 gen8_gpe_media_chroma_surface_setup(ctx,
383 &vpp_gpe_ctx->gpe_ctx,
385 BINDING_TABLE_OFFSET_GEN8(i + 1),
386 SURFACE_STATE_OFFSET_GEN8(i + 1));
389 /* Binding output NV12 surface(Luma + Chroma) */
390 obj_surface = vpp_gpe_ctx->surface_output_object;
392 gen8_gpe_media_rw_surface_setup(ctx,
393 &vpp_gpe_ctx->gpe_ctx,
395 BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
396 SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
397 gen8_gpe_media_chroma_surface_setup(ctx,
398 &vpp_gpe_ctx->gpe_ctx,
400 BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
401 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
402 /* Bind kernel return buffer surface */
403 gen7_gpe_buffer_suface_setup(ctx,
404 &vpp_gpe_ctx->gpe_ctx,
405 &vpp_gpe_ctx->vpp_kernel_return,
406 BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
407 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
409 return VA_STATUS_SUCCESS;
413 gen8_gpe_process_interface_setup(VADriverContextP ctx,
414 struct vpp_gpe_context *vpp_gpe_ctx)
416 struct gen8_interface_descriptor_data *desc;
417 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
422 desc = (struct gen8_interface_descriptor_data *)(bo->virtual
423 + vpp_gpe_ctx->gpe_ctx.idrt_offset);
425 /*Setup the descritor table*/
426 for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
427 struct i965_kernel *kernel;
428 kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
429 assert(sizeof(*desc) == 32);
430 /*Setup the descritor table*/
431 memset(desc, 0, sizeof(*desc));
432 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
433 desc->desc3.sampler_count = 0; /* FIXME: */
434 desc->desc3.sampler_state_pointer = 0;
435 desc->desc4.binding_table_entry_count = 6; /* FIXME: */
436 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
437 desc->desc5.constant_urb_entry_read_offset = 0;
438 desc->desc5.constant_urb_entry_read_length = 0;
445 return VA_STATUS_SUCCESS;
449 gen8_gpe_process_constant_fill(VADriverContextP ctx,
450 struct vpp_gpe_context *vpp_gpe_ctx)
452 dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
453 assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
454 unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
455 memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
456 vpp_gpe_ctx->kernel_param_size);
457 dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
459 return VA_STATUS_SUCCESS;
463 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
464 struct vpp_gpe_context *vpp_gpe_ctx)
466 unsigned int *command_ptr;
467 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
468 unsigned char* position = NULL;
470 /* Thread inline data setting*/
471 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
472 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
474 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
476 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
477 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
483 /* copy thread inline data */
484 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
485 memcpy(command_ptr, position, size);
486 command_ptr += size/sizeof(int);
488 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
493 *command_ptr++ = MI_BATCH_BUFFER_END;
495 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
497 return VA_STATUS_SUCCESS;
501 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
502 struct vpp_gpe_context *vpp_gpe_ctx)
504 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
505 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
507 gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
509 gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
511 BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
512 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
513 OUT_RELOC(vpp_gpe_ctx->batch,
514 vpp_gpe_ctx->vpp_batchbuffer.bo,
515 I915_GEM_DOMAIN_COMMAND, 0,
517 OUT_BATCH(vpp_gpe_ctx->batch, 0);
519 ADVANCE_BATCH(vpp_gpe_ctx->batch);
521 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
523 return VA_STATUS_SUCCESS;
527 gen8_gpe_process_init(VADriverContextP ctx,
528 struct vpp_gpe_context *vpp_gpe_ctx)
530 struct i965_driver_data *i965 = i965_driver_data(ctx);
533 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
534 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
536 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
537 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
538 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
540 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
541 * vpp_gpe_ctx->vpp_kernel_return.size_block;
543 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
544 bo = dri_bo_alloc(i965->intel.bufmgr,
546 batch_buf_size, 0x1000);
547 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
548 dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
550 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
551 bo = dri_bo_alloc(i965->intel.bufmgr,
552 "vpp kernel return buffer",
553 kernel_return_size, 0x1000);
554 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
555 dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
557 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
559 return VA_STATUS_SUCCESS;
563 gen8_gpe_process_prepare(VADriverContextP ctx,
564 struct vpp_gpe_context *vpp_gpe_ctx)
566 /*Setup all the memory object*/
567 gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
568 gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
569 //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
571 /*Programing media pipeline*/
572 gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
574 return VA_STATUS_SUCCESS;
578 gen8_gpe_process_run(VADriverContextP ctx,
579 struct vpp_gpe_context *vpp_gpe_ctx)
581 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
583 return VA_STATUS_SUCCESS;
587 gen8_gpe_process(VADriverContextP ctx,
588 struct vpp_gpe_context * vpp_gpe_ctx)
590 VAStatus va_status = VA_STATUS_SUCCESS;
592 va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
593 if (va_status != VA_STATUS_SUCCESS)
596 va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
597 if (va_status != VA_STATUS_SUCCESS)
600 va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
601 if (va_status != VA_STATUS_SUCCESS)
604 return VA_STATUS_SUCCESS;
608 vpp_gpe_process(VADriverContextP ctx,
609 struct vpp_gpe_context * vpp_gpe_ctx)
611 struct i965_driver_data *i965 = i965_driver_data(ctx);
612 if (IS_HASWELL(i965->intel.device_info))
613 return gen75_gpe_process(ctx, vpp_gpe_ctx);
614 else if (IS_GEN8(i965->intel.device_info))
615 return gen8_gpe_process(ctx, vpp_gpe_ctx);
617 return VA_STATUS_ERROR_UNIMPLEMENTED;
621 vpp_gpe_process_sharpening(VADriverContextP ctx,
622 struct vpp_gpe_context * vpp_gpe_ctx)
624 VAStatus va_status = VA_STATUS_SUCCESS;
625 struct i965_driver_data *i965 = i965_driver_data(ctx);
626 struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
627 struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
629 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
630 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
631 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
633 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
636 !obj_buf->buffer_store ||
637 !obj_buf->buffer_store->buffer)
640 VAProcFilterParameterBuffer* filter =
641 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
642 float sharpening_intensity = filter->value;
644 ThreadParameterSharpening thr_param;
645 unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
649 if(vpp_gpe_ctx->is_first_frame){
650 vpp_gpe_ctx->sub_shader_sum = 3;
651 struct i965_kernel * vpp_kernels;
652 if (IS_HASWELL(i965->intel.device_info))
653 vpp_kernels = gen75_vpp_sharpening_kernels;
654 else if (IS_GEN8(i965->intel.device_info))
655 vpp_kernels = gen8_vpp_sharpening_kernels;
657 vpp_gpe_ctx->gpe_load_kernels(ctx,
658 &vpp_gpe_ctx->gpe_ctx,
660 vpp_gpe_ctx->sub_shader_sum);
663 if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
664 va_status = i965_CreateSurfaces(ctx,
665 vpp_gpe_ctx->in_frame_w,
666 vpp_gpe_ctx->in_frame_h,
669 &vpp_gpe_ctx->surface_tmp);
670 assert(va_status == VA_STATUS_SUCCESS);
672 struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
676 i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
678 vpp_gpe_ctx->surface_tmp_object = obj_surf;
682 assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
683 thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
684 thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
686 thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
687 thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
689 /* Step 1: horizontal blur process */
690 vpp_gpe_ctx->forward_surf_sum = 0;
691 vpp_gpe_ctx->backward_surf_sum = 0;
693 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
694 vpp_gpe_ctx->thread_param_size = thr_param_size;
695 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
696 *vpp_gpe_ctx->thread_num);
697 pos = vpp_gpe_ctx->thread_param;
700 return VA_STATUS_ERROR_ALLOCATION_FAILED;
703 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
704 thr_param.base.v_pos = 16 * i;
705 thr_param.base.h_pos = 0;
706 memcpy(pos, &thr_param, thr_param_size);
707 pos += thr_param_size;
710 vpp_gpe_ctx->sub_shader_index = 0;
711 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
712 free(vpp_gpe_ctx->thread_param);
714 /* Step 2: vertical blur process */
715 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
716 vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
717 vpp_gpe_ctx->forward_surf_sum = 0;
718 vpp_gpe_ctx->backward_surf_sum = 0;
720 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
721 vpp_gpe_ctx->thread_param_size = thr_param_size;
722 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
723 *vpp_gpe_ctx->thread_num);
724 pos = vpp_gpe_ctx->thread_param;
727 return VA_STATUS_ERROR_ALLOCATION_FAILED;
730 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
731 thr_param.base.v_pos = 0;
732 thr_param.base.h_pos = 16 * i;
733 memcpy(pos, &thr_param, thr_param_size);
734 pos += thr_param_size;
737 vpp_gpe_ctx->sub_shader_index = 1;
738 vpp_gpe_process(ctx, vpp_gpe_ctx);
739 free(vpp_gpe_ctx->thread_param);
741 /* Step 3: apply the blur to original surface */
742 vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface;
743 vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object;
744 vpp_gpe_ctx->surface_output_object = origin_out_obj_surface;
745 vpp_gpe_ctx->forward_surf_sum = 1;
746 vpp_gpe_ctx->backward_surf_sum = 0;
748 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
749 vpp_gpe_ctx->thread_param_size = thr_param_size;
750 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
751 *vpp_gpe_ctx->thread_num);
752 pos = vpp_gpe_ctx->thread_param;
755 return VA_STATUS_ERROR_ALLOCATION_FAILED;
758 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
759 thr_param.base.v_pos = 4 * i;
760 thr_param.base.h_pos = 0;
761 memcpy(pos, &thr_param, thr_param_size);
762 pos += thr_param_size;
765 vpp_gpe_ctx->sub_shader_index = 2;
766 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
767 free(vpp_gpe_ctx->thread_param);
772 return VA_STATUS_ERROR_INVALID_PARAMETER;
775 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
776 struct vpp_gpe_context * vpp_gpe_ctx)
778 VAStatus va_status = VA_STATUS_SUCCESS;
779 struct i965_driver_data *i965 = i965_driver_data(ctx);
780 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
781 VAProcFilterParameterBuffer* filter = NULL;
783 struct object_surface *obj_surface = NULL;
785 if (pipe->num_filters && !pipe->filters)
788 for(i = 0; i < pipe->num_filters; i++){
789 struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
791 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
794 !obj_buf->buffer_store ||
795 !obj_buf->buffer_store->buffer)
798 filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
799 if(filter->type == VAProcFilterSharpening){
804 assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
805 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
807 vpp_gpe_ctx->forward_surf_sum = 0;
808 vpp_gpe_ctx->backward_surf_sum = 0;
810 for(i = 0; i < pipe->num_forward_references; i ++)
812 obj_surface = SURFACE(pipe->forward_references[i]);
815 vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
816 vpp_gpe_ctx->forward_surf_sum++;
819 for(i = 0; i < pipe->num_backward_references; i ++)
821 obj_surface = SURFACE(pipe->backward_references[i]);
824 vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
825 vpp_gpe_ctx->backward_surf_sum++;
828 obj_surface = vpp_gpe_ctx->surface_input_object[0];
829 vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
830 vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
832 if(filter && filter->type == VAProcFilterSharpening) {
833 va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
835 va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
838 vpp_gpe_ctx->is_first_frame = 0;
843 return VA_STATUS_ERROR_INVALID_PARAMETER;
847 vpp_gpe_context_destroy(VADriverContextP ctx,
848 struct vpp_gpe_context *vpp_gpe_ctx)
850 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
851 vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
853 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
854 vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
856 vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
858 if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
859 assert(vpp_gpe_ctx->surface_tmp_object != NULL);
860 i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
861 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
862 vpp_gpe_ctx->surface_tmp_object = NULL;
865 free(vpp_gpe_ctx->batch);
870 struct vpp_gpe_context *
871 vpp_gpe_context_init(VADriverContextP ctx)
873 struct i965_driver_data *i965 = i965_driver_data(ctx);
874 struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
875 struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
877 assert(IS_HASWELL(i965->intel.device_info) ||
878 IS_GEN8(i965->intel.device_info));
880 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
881 vpp_gpe_ctx->surface_tmp_object = NULL;
882 vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
883 vpp_gpe_ctx->is_first_frame = 1;
885 gpe_ctx->vfe_state.max_num_threads = 60 - 1;
886 gpe_ctx->vfe_state.num_urb_entries = 16;
887 gpe_ctx->vfe_state.gpgpu_mode = 0;
888 gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
889 gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
891 if (IS_HASWELL(i965->intel.device_info)) {
892 vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
893 vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
894 vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
895 gpe_ctx->surface_state_binding_table.length =
896 (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
898 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
899 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
900 gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
902 } else if (IS_GEN8(i965->intel.device_info)) {
903 vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
904 vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
905 vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
906 gpe_ctx->surface_state_binding_table.length =
907 (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
909 gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
910 gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;