src/gen75_vpp_gpe.c

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <assert.h>
  32
  33 #include "intel_batchbuffer.h"
  34 #include "intel_driver.h"
  35
  36 #include "i965_structs.h"
  37 #include "i965_defines.h"
  38 #include "i965_drv_video.h"
  39 #include "gen75_vpp_gpe.h"
  40
  41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42 #define MAX_MEDIA_SURFACES_GEN6      34
  43
  44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46
  47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49
  50 #define CURBE_ALLOCATION_SIZE   37
  51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
  52 #define CURBE_URB_ENTRY_LENGTH  4
  53
  54 /* Shaders information for sharpening */
  55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  56    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  57 };
  58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  59    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  60 };
  61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  62    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  63 };
  64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  65     {
  66         "vpp: sharpening(horizontal blur)",
  67         VPP_GPE_SHARPENING,
  68         gen75_gpe_sharpening_h_blur,
  69         sizeof(gen75_gpe_sharpening_h_blur),
  70         NULL
  71     },
  72     {
  73         "vpp: sharpening(vertical blur)",
  74         VPP_GPE_SHARPENING,
  75         gen75_gpe_sharpening_v_blur,
  76         sizeof(gen75_gpe_sharpening_v_blur),
  77         NULL
  78     },
  79     {
  80         "vpp: sharpening(unmask)",
  81         VPP_GPE_SHARPENING,
  82         gen75_gpe_sharpening_unmask,
  83         sizeof(gen75_gpe_sharpening_unmask),
  84         NULL
  85     },
  86 };
  87
  88 /* sharpening kernels for Broadwell */
  89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  90    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  91 };
  92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
  93    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
  94 };
  95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
  96    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
  97 };
  98
  99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
 100     {
 101         "vpp: sharpening(horizontal blur)",
 102         VPP_GPE_SHARPENING,
 103         gen8_gpe_sharpening_h_blur,
 104         sizeof(gen8_gpe_sharpening_h_blur),
 105         NULL
 106     },
 107     {
 108         "vpp: sharpening(vertical blur)",
 109         VPP_GPE_SHARPENING,
 110         gen8_gpe_sharpening_v_blur,
 111         sizeof(gen8_gpe_sharpening_v_blur),
 112         NULL
 113     },
 114     {
 115         "vpp: sharpening(unmask)",
 116         VPP_GPE_SHARPENING,
 117         gen8_gpe_sharpening_unmask,
 118         sizeof(gen8_gpe_sharpening_unmask),
 119         NULL
 120     },
 121 };
 122
 123 static VAStatus
 124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
 125                    struct vpp_gpe_context *vpp_gpe_ctx)
 126 {
 127     struct object_surface *obj_surface;
 128     unsigned int i = 0;
 129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 130                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 131
 132     /* Binding input NV12 surfaces (Luma + Chroma)*/
 133     for( i = 0; i < input_surface_sum; i += 2){
 134          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 135          assert(obj_surface);
 136          gen7_gpe_media_rw_surface_setup(ctx,
 137                                          &vpp_gpe_ctx->gpe_ctx,
 138                                           obj_surface,
 139                                           BINDING_TABLE_OFFSET_GEN7(i),
 140                                           SURFACE_STATE_OFFSET_GEN7(i));
 141
 142          gen75_gpe_media_chroma_surface_setup(ctx,
 143                                           &vpp_gpe_ctx->gpe_ctx,
 144                                           obj_surface,
 145                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
 146                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
 147     }
 148
 149     /* Binding output NV12 surface(Luma + Chroma) */
 150     obj_surface = vpp_gpe_ctx->surface_output_object;
 151     assert(obj_surface);
 152     gen7_gpe_media_rw_surface_setup(ctx,
 153                                     &vpp_gpe_ctx->gpe_ctx,
 154                                     obj_surface,
 155                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
 156                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
 157     gen75_gpe_media_chroma_surface_setup(ctx,
 158                                     &vpp_gpe_ctx->gpe_ctx,
 159                                     obj_surface,
 160                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
 161                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
 162     /* Bind kernel return buffer surface */
 163     gen7_gpe_buffer_suface_setup(ctx,
 164                                   &vpp_gpe_ctx->gpe_ctx,
 165                                   &vpp_gpe_ctx->vpp_kernel_return,
 166                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
 167                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
 168
 169     return VA_STATUS_SUCCESS;
 170 }
 171
 172 static VAStatus
 173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
 174                     struct vpp_gpe_context *vpp_gpe_ctx)
 175 {
 176     struct gen6_interface_descriptor_data *desc;
 177     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 178     int i;
 179
 180     dri_bo_map(bo, 1);
 181     assert(bo->virtual);
 182     desc = bo->virtual;
 183
 184     /*Setup the descritor table*/
 185     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 186         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 187         assert(sizeof(*desc) == 32);
 188         memset(desc, 0, sizeof(*desc));
 189         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
 190         desc->desc2.sampler_count = 0; /* FIXME: */
 191         desc->desc2.sampler_state_pointer = 0;
 192         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
 193         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
 194         desc->desc4.constant_urb_entry_read_offset = 0;
 195         desc->desc4.constant_urb_entry_read_length = 0;
 196
 197         dri_bo_emit_reloc(bo,
 198                           I915_GEM_DOMAIN_INSTRUCTION, 0,
 199                           0,
 200                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
 201                           kernel->bo);
 202         desc++;
 203     }
 204
 205     dri_bo_unmap(bo);
 206
 207     return VA_STATUS_SUCCESS;
 208 }
 209
 210 static VAStatus
 211 gen75_gpe_process_constant_fill(VADriverContextP ctx,
 212                    struct vpp_gpe_context *vpp_gpe_ctx)
 213 {
 214     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
 215     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
 216     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
 217     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 218                             vpp_gpe_ctx->kernel_param_size);
 219     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
 220
 221     return VA_STATUS_SUCCESS;
 222 }
 223
 224 static VAStatus
 225 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
 226                            struct vpp_gpe_context *vpp_gpe_ctx)
 227 {
 228     unsigned int *command_ptr;
 229     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 230     unsigned char* position = NULL;
 231
 232     /* Thread inline data setting*/
 233     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 234     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 235
 236     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 237     {
 238          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 239          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 240          *command_ptr++ = 0;
 241          *command_ptr++ = 0;
 242          *command_ptr++ = 0;
 243          *command_ptr++ = 0;
 244
 245          /* copy thread inline data */
 246          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 247          memcpy(command_ptr, position, size);
 248          command_ptr += size/sizeof(int);
 249     }
 250
 251     *command_ptr++ = 0;
 252     *command_ptr++ = MI_BATCH_BUFFER_END;
 253
 254     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 255
 256     return VA_STATUS_SUCCESS;
 257 }
 258
 259 static VAStatus
 260 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
 261                    struct vpp_gpe_context *vpp_gpe_ctx)
 262 {
 263     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 264     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 265
 266     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 267
 268     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 269
 270     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
 271     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
 272     OUT_RELOC(vpp_gpe_ctx->batch,
 273               vpp_gpe_ctx->vpp_batchbuffer.bo,
 274               I915_GEM_DOMAIN_COMMAND, 0,
 275               0);
 276     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 277
 278     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 279
 280     return VA_STATUS_SUCCESS;
 281 }
 282
 283 static VAStatus
 284 gen75_gpe_process_init(VADriverContextP ctx,
 285                  struct vpp_gpe_context *vpp_gpe_ctx)
 286 {
 287     struct i965_driver_data *i965 = i965_driver_data(ctx);
 288     dri_bo *bo;
 289
 290     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 291                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 292
 293     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 294     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 295     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 296     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 297            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 298
 299     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 300     bo = dri_bo_alloc(i965->intel.bufmgr,
 301                       "vpp batch buffer",
 302                        batch_buf_size, 0x1000);
 303     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 304     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 305
 306     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 307     bo = dri_bo_alloc(i965->intel.bufmgr,
 308                       "vpp kernel return buffer",
 309                        kernel_return_size, 0x1000);
 310     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 311     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 312
 313     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 314
 315     return VA_STATUS_SUCCESS;
 316 }
 317
 318 static VAStatus
 319 gen75_gpe_process_prepare(VADriverContextP ctx,
 320                     struct vpp_gpe_context *vpp_gpe_ctx)
 321 {
 322     /*Setup all the memory object*/
 323     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 324     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 325     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 326
 327     /*Programing media pipeline*/
 328     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 329
 330     return VA_STATUS_SUCCESS;
 331 }
 332
 333 static VAStatus
 334 gen75_gpe_process_run(VADriverContextP ctx,
 335                 struct vpp_gpe_context *vpp_gpe_ctx)
 336 {
 337     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 338
 339     return VA_STATUS_SUCCESS;
 340 }
 341
 342 static VAStatus
 343 gen75_gpe_process(VADriverContextP ctx,
 344                   struct vpp_gpe_context * vpp_gpe_ctx)
 345 {
 346     VAStatus va_status = VA_STATUS_SUCCESS;
 347
 348     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
 349     if (va_status != VA_STATUS_SUCCESS)
 350         return va_status;
 351
 352     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
 353     if (va_status != VA_STATUS_SUCCESS)
 354         return va_status;
 355
 356     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
 357     if (va_status != VA_STATUS_SUCCESS)
 358         return va_status;
 359
 360     return VA_STATUS_SUCCESS;
 361 }
 362
 363 static VAStatus
 364 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
 365                    struct vpp_gpe_context *vpp_gpe_ctx)
 366 {
 367     struct object_surface *obj_surface;
 368     unsigned int i = 0;
 369     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 370                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 371
 372     /* Binding input NV12 surfaces (Luma + Chroma)*/
 373     for( i = 0; i < input_surface_sum; i += 2){
 374          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 375          assert(obj_surface);
 376          gen8_gpe_media_rw_surface_setup(ctx,
 377                                          &vpp_gpe_ctx->gpe_ctx,
 378                                           obj_surface,
 379                                           BINDING_TABLE_OFFSET_GEN8(i),
 380                                           SURFACE_STATE_OFFSET_GEN8(i));
 381
 382          gen8_gpe_media_chroma_surface_setup(ctx,
 383                                           &vpp_gpe_ctx->gpe_ctx,
 384                                           obj_surface,
 385                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
 386                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
 387     }
 388
 389     /* Binding output NV12 surface(Luma + Chroma) */
 390     obj_surface = vpp_gpe_ctx->surface_output_object;
 391     assert(obj_surface);
 392     gen8_gpe_media_rw_surface_setup(ctx,
 393                                     &vpp_gpe_ctx->gpe_ctx,
 394                                     obj_surface,
 395                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
 396                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
 397     gen8_gpe_media_chroma_surface_setup(ctx,
 398                                     &vpp_gpe_ctx->gpe_ctx,
 399                                     obj_surface,
 400                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
 401                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
 402     /* Bind kernel return buffer surface */
 403     gen7_gpe_buffer_suface_setup(ctx,
 404                                   &vpp_gpe_ctx->gpe_ctx,
 405                                   &vpp_gpe_ctx->vpp_kernel_return,
 406                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
 407                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
 408
 409     return VA_STATUS_SUCCESS;
 410 }
 411
 412 static VAStatus
 413 gen8_gpe_process_interface_setup(VADriverContextP ctx,
 414                     struct vpp_gpe_context *vpp_gpe_ctx)
 415 {
 416     struct gen8_interface_descriptor_data *desc;
 417     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
 418     int i;
 419
 420     dri_bo_map(bo, 1);
 421     assert(bo->virtual);
 422     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
 423                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
 424
 425     /*Setup the descritor table*/
 426     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 427         struct i965_kernel *kernel;
 428         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 429         assert(sizeof(*desc) == 32);
 430         /*Setup the descritor table*/
 431          memset(desc, 0, sizeof(*desc));
 432          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
 433          desc->desc3.sampler_count = 0; /* FIXME: */
 434          desc->desc3.sampler_state_pointer = 0;
 435          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
 436          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
 437          desc->desc5.constant_urb_entry_read_offset = 0;
 438          desc->desc5.constant_urb_entry_read_length = 0;
 439
 440          desc++;
 441     }
 442
 443     dri_bo_unmap(bo);
 444
 445     return VA_STATUS_SUCCESS;
 446 }
 447
 448 static VAStatus
 449 gen8_gpe_process_constant_fill(VADriverContextP ctx,
 450                    struct vpp_gpe_context *vpp_gpe_ctx)
 451 {
 452     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
 453     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
 454     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
 455     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 456                             vpp_gpe_ctx->kernel_param_size);
 457     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
 458
 459     return VA_STATUS_SUCCESS;
 460 }
 461
 462 static VAStatus
 463 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
 464                            struct vpp_gpe_context *vpp_gpe_ctx)
 465 {
 466     unsigned int *command_ptr;
 467     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 468     unsigned char* position = NULL;
 469
 470     /* Thread inline data setting*/
 471     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 472     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 473
 474     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 475     {
 476          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 477          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 478          *command_ptr++ = 0;
 479          *command_ptr++ = 0;
 480          *command_ptr++ = 0;
 481          *command_ptr++ = 0;
 482
 483          /* copy thread inline data */
 484          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 485          memcpy(command_ptr, position, size);
 486          command_ptr += size/sizeof(int);
 487
 488          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
 489          *command_ptr++ = 0;
 490     }
 491
 492     *command_ptr++ = 0;
 493     *command_ptr++ = MI_BATCH_BUFFER_END;
 494
 495     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 496
 497     return VA_STATUS_SUCCESS;
 498 }
 499
 500 static VAStatus
 501 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
 502                    struct vpp_gpe_context *vpp_gpe_ctx)
 503 {
 504     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 505     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 506
 507     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 508
 509     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 510
 511     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
 512     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
 513     OUT_RELOC(vpp_gpe_ctx->batch,
 514               vpp_gpe_ctx->vpp_batchbuffer.bo,
 515               I915_GEM_DOMAIN_COMMAND, 0,
 516               0);
 517     OUT_BATCH(vpp_gpe_ctx->batch, 0);
 518
 519     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 520
 521     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 522
 523     return VA_STATUS_SUCCESS;
 524 }
 525
 526 static VAStatus
 527 gen8_gpe_process_init(VADriverContextP ctx,
 528                  struct vpp_gpe_context *vpp_gpe_ctx)
 529 {
 530     struct i965_driver_data *i965 = i965_driver_data(ctx);
 531     dri_bo *bo;
 532
 533     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 534                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 535
 536     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 537     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 538     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 539
 540     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 541            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 542
 543     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 544     bo = dri_bo_alloc(i965->intel.bufmgr,
 545                       "vpp batch buffer",
 546                        batch_buf_size, 0x1000);
 547     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 548     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 549
 550     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 551     bo = dri_bo_alloc(i965->intel.bufmgr,
 552                       "vpp kernel return buffer",
 553                        kernel_return_size, 0x1000);
 554     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 555     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 556
 557     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 558
 559     return VA_STATUS_SUCCESS;
 560 }
 561
 562 static VAStatus
 563 gen8_gpe_process_prepare(VADriverContextP ctx,
 564                     struct vpp_gpe_context *vpp_gpe_ctx)
 565 {
 566     /*Setup all the memory object*/
 567     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 568     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 569     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 570
 571     /*Programing media pipeline*/
 572     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 573
 574     return VA_STATUS_SUCCESS;
 575 }
 576
 577 static VAStatus
 578 gen8_gpe_process_run(VADriverContextP ctx,
 579                 struct vpp_gpe_context *vpp_gpe_ctx)
 580 {
 581     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 582
 583     return VA_STATUS_SUCCESS;
 584 }
 585
 586 static VAStatus
 587 gen8_gpe_process(VADriverContextP ctx,
 588                   struct vpp_gpe_context * vpp_gpe_ctx)
 589 {
 590     VAStatus va_status = VA_STATUS_SUCCESS;
 591
 592     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
 593     if (va_status != VA_STATUS_SUCCESS)
 594         return va_status;
 595
 596     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
 597     if (va_status != VA_STATUS_SUCCESS)
 598         return va_status;
 599
 600     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
 601     if (va_status != VA_STATUS_SUCCESS)
 602         return va_status;
 603
 604     return VA_STATUS_SUCCESS;
 605 }
 606
 607 static VAStatus
 608 vpp_gpe_process(VADriverContextP ctx,
 609                   struct vpp_gpe_context * vpp_gpe_ctx)
 610 {
 611     struct i965_driver_data *i965 = i965_driver_data(ctx);
 612     if (IS_HASWELL(i965->intel.device_info))
 613        return gen75_gpe_process(ctx, vpp_gpe_ctx);
 614     else if (IS_GEN8(i965->intel.device_info))
 615        return gen8_gpe_process(ctx, vpp_gpe_ctx);
 616
 617      return VA_STATUS_ERROR_UNIMPLEMENTED;
 618 }
 619
 620 static VAStatus
 621 vpp_gpe_process_sharpening(VADriverContextP ctx,
 622                              struct vpp_gpe_context * vpp_gpe_ctx)
 623 {
 624      VAStatus va_status = VA_STATUS_SUCCESS;
 625      struct i965_driver_data *i965 = i965_driver_data(ctx);
 626      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
 627      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
 628
 629      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 630      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
 631      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
 632
 633      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 634
 635      if (!obj_buf ||
 636          !obj_buf->buffer_store ||
 637          !obj_buf->buffer_store->buffer)
 638          goto error;
 639
 640      VAProcFilterParameterBuffer* filter =
 641                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 642      float sharpening_intensity = filter->value;
 643
 644      ThreadParameterSharpening thr_param;
 645      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
 646      unsigned int i;
 647      unsigned char * pos;
 648
 649      if(vpp_gpe_ctx->is_first_frame){
 650          vpp_gpe_ctx->sub_shader_sum = 3;
 651          struct i965_kernel * vpp_kernels;
 652          if (IS_HASWELL(i965->intel.device_info))
 653              vpp_kernels = gen75_vpp_sharpening_kernels;
 654          else if (IS_GEN8(i965->intel.device_info))
 655              vpp_kernels = gen8_vpp_sharpening_kernels;
 656
 657          vpp_gpe_ctx->gpe_load_kernels(ctx,
 658                                &vpp_gpe_ctx->gpe_ctx,
 659                                vpp_kernels,
 660                                vpp_gpe_ctx->sub_shader_sum);
 661      }
 662
 663      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
 664         va_status = i965_CreateSurfaces(ctx,
 665                                        vpp_gpe_ctx->in_frame_w,
 666                                        vpp_gpe_ctx->in_frame_h,
 667                                        VA_RT_FORMAT_YUV420,
 668                                        1,
 669                                        &vpp_gpe_ctx->surface_tmp);
 670        assert(va_status == VA_STATUS_SUCCESS);
 671
 672        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
 673        assert(obj_surf);
 674
 675        if (obj_surf) {
 676            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
 677                                        SUBSAMPLE_YUV420);
 678            vpp_gpe_ctx->surface_tmp_object = obj_surf;
 679        }
 680     }
 681
 682     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
 683     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
 684     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
 685
 686     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
 687     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
 688
 689     /* Step 1: horizontal blur process */
 690     vpp_gpe_ctx->forward_surf_sum = 0;
 691     vpp_gpe_ctx->backward_surf_sum = 0;
 692
 693     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
 694     vpp_gpe_ctx->thread_param_size = thr_param_size;
 695     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 696                                                        *vpp_gpe_ctx->thread_num);
 697     pos = vpp_gpe_ctx->thread_param;
 698
 699     if (!pos) {
 700         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 701     }
 702
 703     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 704         thr_param.base.v_pos = 16 * i;
 705         thr_param.base.h_pos = 0;
 706         memcpy(pos, &thr_param, thr_param_size);
 707         pos += thr_param_size;
 708     }
 709
 710     vpp_gpe_ctx->sub_shader_index = 0;
 711     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 712     free(vpp_gpe_ctx->thread_param);
 713
 714     /* Step 2: vertical blur process */
 715     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
 716     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
 717     vpp_gpe_ctx->forward_surf_sum = 0;
 718     vpp_gpe_ctx->backward_surf_sum = 0;
 719
 720     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
 721     vpp_gpe_ctx->thread_param_size = thr_param_size;
 722     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 723                                                        *vpp_gpe_ctx->thread_num);
 724     pos = vpp_gpe_ctx->thread_param;
 725
 726     if (!pos) {
 727         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 728     }
 729
 730     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 731         thr_param.base.v_pos = 0;
 732         thr_param.base.h_pos = 16 * i;
 733         memcpy(pos, &thr_param, thr_param_size);
 734         pos += thr_param_size;
 735     }
 736
 737     vpp_gpe_ctx->sub_shader_index = 1;
 738     vpp_gpe_process(ctx, vpp_gpe_ctx);
 739     free(vpp_gpe_ctx->thread_param);
 740
 741     /* Step 3: apply the blur to original surface */
 742     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
 743     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
 744     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
 745     vpp_gpe_ctx->forward_surf_sum  = 1;
 746     vpp_gpe_ctx->backward_surf_sum = 0;
 747
 748     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
 749     vpp_gpe_ctx->thread_param_size = thr_param_size;
 750     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 751                                                        *vpp_gpe_ctx->thread_num);
 752     pos = vpp_gpe_ctx->thread_param;
 753
 754     if (!pos) {
 755         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 756     }
 757
 758     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 759         thr_param.base.v_pos = 4 * i;
 760         thr_param.base.h_pos = 0;
 761         memcpy(pos, &thr_param, thr_param_size);
 762         pos += thr_param_size;
 763     }
 764
 765     vpp_gpe_ctx->sub_shader_index = 2;
 766     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 767     free(vpp_gpe_ctx->thread_param);
 768
 769     return va_status;
 770
 771 error:
 772     return VA_STATUS_ERROR_INVALID_PARAMETER;
 773 }
 774
 775 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
 776                     struct vpp_gpe_context * vpp_gpe_ctx)
 777 {
 778     VAStatus va_status = VA_STATUS_SUCCESS;
 779     struct i965_driver_data *i965 = i965_driver_data(ctx);
 780     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 781     VAProcFilterParameterBuffer* filter = NULL;
 782     unsigned int i;
 783     struct object_surface *obj_surface = NULL;
 784
 785     if (pipe->num_filters && !pipe->filters)
 786         goto error;
 787
 788     for(i = 0; i < pipe->num_filters; i++){
 789         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
 790
 791         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 792
 793         if (!obj_buf ||
 794             !obj_buf->buffer_store ||
 795             !obj_buf->buffer_store->buffer)
 796             goto error;
 797
 798         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 799         if(filter->type == VAProcFilterSharpening){
 800            break;
 801         }
 802     }
 803
 804     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
 805     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
 806
 807     vpp_gpe_ctx->forward_surf_sum = 0;
 808     vpp_gpe_ctx->backward_surf_sum = 0;
 809
 810     for(i = 0; i < pipe->num_forward_references; i ++)
 811     {
 812         obj_surface = SURFACE(pipe->forward_references[i]);
 813
 814         assert(obj_surface);
 815         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
 816         vpp_gpe_ctx->forward_surf_sum++;
 817     }
 818
 819     for(i = 0; i < pipe->num_backward_references; i ++)
 820     {
 821         obj_surface = SURFACE(pipe->backward_references[i]);
 822
 823         assert(obj_surface);
 824         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
 825         vpp_gpe_ctx->backward_surf_sum++;
 826     }
 827
 828     obj_surface = vpp_gpe_ctx->surface_input_object[0];
 829     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
 830     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
 831
 832     if(filter && filter->type == VAProcFilterSharpening) {
 833        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
 834     } else {
 835        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
 836     }
 837
 838     vpp_gpe_ctx->is_first_frame = 0;
 839
 840     return va_status;
 841
 842 error:
 843     return VA_STATUS_ERROR_INVALID_PARAMETER;
 844 }
 845
 846 void
 847 vpp_gpe_context_destroy(VADriverContextP ctx,
 848                                struct vpp_gpe_context *vpp_gpe_ctx)
 849 {
 850     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 851     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
 852
 853     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 854     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
 855
 856     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
 857
 858     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
 859         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
 860         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
 861         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 862         vpp_gpe_ctx->surface_tmp_object = NULL;
 863     }
 864
 865     free(vpp_gpe_ctx->batch);
 866
 867     free(vpp_gpe_ctx);
 868 }
 869
 870 struct vpp_gpe_context *
 871 vpp_gpe_context_init(VADriverContextP ctx)
 872 {
 873     struct i965_driver_data *i965 = i965_driver_data(ctx);
 874     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
 875     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
 876
 877     assert(IS_HASWELL(i965->intel.device_info) ||
 878            IS_GEN8(i965->intel.device_info));
 879
 880     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 881     vpp_gpe_ctx->surface_tmp_object = NULL;
 882     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
 883     vpp_gpe_ctx->is_first_frame = 1;
 884
 885     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
 886     gpe_ctx->vfe_state.num_urb_entries = 16;
 887     gpe_ctx->vfe_state.gpgpu_mode = 0;
 888     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
 889     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 890
 891     if (IS_HASWELL(i965->intel.device_info)) {
 892         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
 893         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
 894         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
 895         gpe_ctx->surface_state_binding_table.length =
 896                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 897
 898         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 899         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 900         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
 901
 902     } else if (IS_GEN8(i965->intel.device_info)) {
 903         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
 904         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
 905         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
 906         gpe_ctx->surface_state_binding_table.length =
 907                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 908
 909         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
 910         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
 911
 912     }
 913
 914     return vpp_gpe_ctx;
 915 }
 916