src/gen75_vpp_gpe.c

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <assert.h>
  32
  33 #include "intel_batchbuffer.h"
  34 #include "intel_driver.h"
  35
  36 #include "i965_structs.h"
  37 #include "i965_defines.h"
  38 #include "i965_drv_video.h"
  39 #include "gen75_vpp_gpe.h"
  40
  41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42 #define MAX_MEDIA_SURFACES_GEN6      34
  43
  44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46
  47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49
  50 #define CURBE_ALLOCATION_SIZE   37
  51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
  52 #define CURBE_URB_ENTRY_LENGTH  4
  53
  54 extern VAStatus
  55 i965_CreateSurfaces(VADriverContextP ctx,
  56                     int width,
  57                     int height,
  58                     int format,
  59                     int num_surfaces,
  60                     VASurfaceID *surfaces);
  61
  62 /* Shaders information for sharpening */
  63 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  64    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  65 };
  66 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  67    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  68 };
  69 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  70    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  71 };
  72 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  73     {
  74         "vpp: sharpening(horizontal blur)",
  75         VPP_GPE_SHARPENING,
  76         gen75_gpe_sharpening_h_blur,
  77         sizeof(gen75_gpe_sharpening_h_blur),
  78         NULL
  79     },
  80     {
  81         "vpp: sharpening(vertical blur)",
  82         VPP_GPE_SHARPENING,
  83         gen75_gpe_sharpening_v_blur,
  84         sizeof(gen75_gpe_sharpening_v_blur),
  85         NULL
  86     },
  87     {
  88         "vpp: sharpening(unmask)",
  89         VPP_GPE_SHARPENING,
  90         gen75_gpe_sharpening_unmask,
  91         sizeof(gen75_gpe_sharpening_unmask),
  92         NULL
  93     },
  94 };
  95
  96 /* sharpening kernels for Broadwell */
  97 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  98    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  99 };
 100 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
 101    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
 102 };
 103 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
 104    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
 105 };
 106
 107 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
 108     {
 109         "vpp: sharpening(horizontal blur)",
 110         VPP_GPE_SHARPENING,
 111         gen8_gpe_sharpening_h_blur,
 112         sizeof(gen8_gpe_sharpening_h_blur),
 113         NULL
 114     },
 115     {
 116         "vpp: sharpening(vertical blur)",
 117         VPP_GPE_SHARPENING,
 118         gen8_gpe_sharpening_v_blur,
 119         sizeof(gen8_gpe_sharpening_v_blur),
 120         NULL
 121     },
 122     {
 123         "vpp: sharpening(unmask)",
 124         VPP_GPE_SHARPENING,
 125         gen8_gpe_sharpening_unmask,
 126         sizeof(gen8_gpe_sharpening_unmask),
 127         NULL
 128     },
 129 };
 130
 131 static VAStatus
 132 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
 133                    struct vpp_gpe_context *vpp_gpe_ctx)
 134 {
 135     struct object_surface *obj_surface;
 136     unsigned int i = 0;
 137     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 138                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 139
 140     /* Binding input NV12 surfaces (Luma + Chroma)*/
 141     for( i = 0; i < input_surface_sum; i += 2){
 142          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 143          assert(obj_surface);
 144          gen7_gpe_media_rw_surface_setup(ctx,
 145                                          &vpp_gpe_ctx->gpe_ctx,
 146                                           obj_surface,
 147                                           BINDING_TABLE_OFFSET_GEN7(i),
 148                                           SURFACE_STATE_OFFSET_GEN7(i));
 149
 150          gen75_gpe_media_chroma_surface_setup(ctx,
 151                                           &vpp_gpe_ctx->gpe_ctx,
 152                                           obj_surface,
 153                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
 154                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
 155     }
 156
 157     /* Binding output NV12 surface(Luma + Chroma) */
 158     obj_surface = vpp_gpe_ctx->surface_output_object;
 159     assert(obj_surface);
 160     gen7_gpe_media_rw_surface_setup(ctx,
 161                                     &vpp_gpe_ctx->gpe_ctx,
 162                                     obj_surface,
 163                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
 164                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
 165     gen75_gpe_media_chroma_surface_setup(ctx,
 166                                     &vpp_gpe_ctx->gpe_ctx,
 167                                     obj_surface,
 168                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
 169                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
 170     /* Bind kernel return buffer surface */
 171     gen7_gpe_buffer_suface_setup(ctx,
 172                                   &vpp_gpe_ctx->gpe_ctx,
 173                                   &vpp_gpe_ctx->vpp_kernel_return,
 174                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
 175                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
 176
 177     return VA_STATUS_SUCCESS;
 178 }
 179
 180 static VAStatus
 181 gen75_gpe_process_interface_setup(VADriverContextP ctx,
 182                     struct vpp_gpe_context *vpp_gpe_ctx)
 183 {
 184     struct gen6_interface_descriptor_data *desc;
 185     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 186     int i;
 187
 188     dri_bo_map(bo, 1);
 189     assert(bo->virtual);
 190     desc = bo->virtual;
 191
 192     /*Setup the descritor table*/
 193     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 194         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 195         assert(sizeof(*desc) == 32);
 196         memset(desc, 0, sizeof(*desc));
 197         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
 198         desc->desc2.sampler_count = 0; /* FIXME: */
 199         desc->desc2.sampler_state_pointer = 0;
 200         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
 201         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
 202         desc->desc4.constant_urb_entry_read_offset = 0;
 203         desc->desc4.constant_urb_entry_read_length = 0;
 204
 205         dri_bo_emit_reloc(bo,
 206                           I915_GEM_DOMAIN_INSTRUCTION, 0,
 207                           0,
 208                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
 209                           kernel->bo);
 210         desc++;
 211     }
 212
 213     dri_bo_unmap(bo);
 214
 215     return VA_STATUS_SUCCESS;
 216 }
 217
 218 static VAStatus
 219 gen75_gpe_process_constant_fill(VADriverContextP ctx,
 220                    struct vpp_gpe_context *vpp_gpe_ctx)
 221 {
 222     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
 223     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
 224     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
 225     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 226                             vpp_gpe_ctx->kernel_param_size);
 227     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
 228
 229     return VA_STATUS_SUCCESS;
 230 }
 231
 232 static VAStatus
 233 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
 234                            struct vpp_gpe_context *vpp_gpe_ctx)
 235 {
 236     unsigned int *command_ptr;
 237     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 238     unsigned char* position = NULL;
 239
 240     /* Thread inline data setting*/
 241     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 242     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 243
 244     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 245     {
 246          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 247          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 248          *command_ptr++ = 0;
 249          *command_ptr++ = 0;
 250          *command_ptr++ = 0;
 251          *command_ptr++ = 0;
 252
 253          /* copy thread inline data */
 254          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 255          memcpy(command_ptr, position, size);
 256          command_ptr += size/sizeof(int);
 257     }
 258
 259     *command_ptr++ = 0;
 260     *command_ptr++ = MI_BATCH_BUFFER_END;
 261
 262     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 263
 264     return VA_STATUS_SUCCESS;
 265 }
 266
 267 static VAStatus
 268 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
 269                    struct vpp_gpe_context *vpp_gpe_ctx)
 270 {
 271     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 272     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 273
 274     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 275
 276     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 277
 278     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
 279     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
 280     OUT_RELOC(vpp_gpe_ctx->batch,
 281               vpp_gpe_ctx->vpp_batchbuffer.bo,
 282               I915_GEM_DOMAIN_COMMAND, 0,
 283               0);
 284     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 285
 286     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 287
 288     return VA_STATUS_SUCCESS;
 289 }
 290
 291 static VAStatus
 292 gen75_gpe_process_init(VADriverContextP ctx,
 293                  struct vpp_gpe_context *vpp_gpe_ctx)
 294 {
 295     struct i965_driver_data *i965 = i965_driver_data(ctx);
 296     dri_bo *bo;
 297
 298     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 299                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 300
 301     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 302     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 303     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 304     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 305            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 306
 307     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 308     bo = dri_bo_alloc(i965->intel.bufmgr,
 309                       "vpp batch buffer",
 310                        batch_buf_size, 0x1000);
 311     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 312     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 313
 314     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 315     bo = dri_bo_alloc(i965->intel.bufmgr,
 316                       "vpp kernel return buffer",
 317                        kernel_return_size, 0x1000);
 318     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 319     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 320
 321     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 322
 323     return VA_STATUS_SUCCESS;
 324 }
 325
 326 static VAStatus
 327 gen75_gpe_process_prepare(VADriverContextP ctx,
 328                     struct vpp_gpe_context *vpp_gpe_ctx)
 329 {
 330     /*Setup all the memory object*/
 331     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 332     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 333     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 334
 335     /*Programing media pipeline*/
 336     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 337
 338     return VA_STATUS_SUCCESS;
 339 }
 340
 341 static VAStatus
 342 gen75_gpe_process_run(VADriverContextP ctx,
 343                 struct vpp_gpe_context *vpp_gpe_ctx)
 344 {
 345     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 346
 347     return VA_STATUS_SUCCESS;
 348 }
 349
 350 static VAStatus
 351 gen75_gpe_process(VADriverContextP ctx,
 352                   struct vpp_gpe_context * vpp_gpe_ctx)
 353 {
 354     VAStatus va_status = VA_STATUS_SUCCESS;
 355
 356     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
 357     if (va_status != VA_STATUS_SUCCESS)
 358         return va_status;
 359
 360     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
 361     if (va_status != VA_STATUS_SUCCESS)
 362         return va_status;
 363
 364     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
 365     if (va_status != VA_STATUS_SUCCESS)
 366         return va_status;
 367
 368     return VA_STATUS_SUCCESS;
 369 }
 370
 371 static VAStatus
 372 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
 373                    struct vpp_gpe_context *vpp_gpe_ctx)
 374 {
 375     struct object_surface *obj_surface;
 376     unsigned int i = 0;
 377     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 378                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 379
 380     /* Binding input NV12 surfaces (Luma + Chroma)*/
 381     for( i = 0; i < input_surface_sum; i += 2){
 382          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 383          assert(obj_surface);
 384          gen8_gpe_media_rw_surface_setup(ctx,
 385                                          &vpp_gpe_ctx->gpe_ctx,
 386                                           obj_surface,
 387                                           BINDING_TABLE_OFFSET_GEN8(i),
 388                                           SURFACE_STATE_OFFSET_GEN8(i));
 389
 390          gen8_gpe_media_chroma_surface_setup(ctx,
 391                                           &vpp_gpe_ctx->gpe_ctx,
 392                                           obj_surface,
 393                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
 394                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
 395     }
 396
 397     /* Binding output NV12 surface(Luma + Chroma) */
 398     obj_surface = vpp_gpe_ctx->surface_output_object;
 399     assert(obj_surface);
 400     gen8_gpe_media_rw_surface_setup(ctx,
 401                                     &vpp_gpe_ctx->gpe_ctx,
 402                                     obj_surface,
 403                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
 404                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
 405     gen8_gpe_media_chroma_surface_setup(ctx,
 406                                     &vpp_gpe_ctx->gpe_ctx,
 407                                     obj_surface,
 408                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
 409                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
 410     /* Bind kernel return buffer surface */
 411     gen7_gpe_buffer_suface_setup(ctx,
 412                                   &vpp_gpe_ctx->gpe_ctx,
 413                                   &vpp_gpe_ctx->vpp_kernel_return,
 414                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
 415                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
 416
 417     return VA_STATUS_SUCCESS;
 418 }
 419
 420 static VAStatus
 421 gen8_gpe_process_interface_setup(VADriverContextP ctx,
 422                     struct vpp_gpe_context *vpp_gpe_ctx)
 423 {
 424     struct gen8_interface_descriptor_data *desc;
 425     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
 426     int i;
 427
 428     dri_bo_map(bo, 1);
 429     assert(bo->virtual);
 430     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
 431                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
 432
 433     /*Setup the descritor table*/
 434     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 435         struct i965_kernel *kernel;
 436         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 437         assert(sizeof(*desc) == 32);
 438         /*Setup the descritor table*/
 439          memset(desc, 0, sizeof(*desc));
 440          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
 441          desc->desc3.sampler_count = 0; /* FIXME: */
 442          desc->desc3.sampler_state_pointer = 0;
 443          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
 444          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
 445          desc->desc5.constant_urb_entry_read_offset = 0;
 446          desc->desc5.constant_urb_entry_read_length = 0;
 447
 448          desc++;
 449     }
 450
 451     dri_bo_unmap(bo);
 452
 453     return VA_STATUS_SUCCESS;
 454 }
 455
 456 static VAStatus
 457 gen8_gpe_process_constant_fill(VADriverContextP ctx,
 458                    struct vpp_gpe_context *vpp_gpe_ctx)
 459 {
 460     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
 461     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
 462     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
 463     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 464                             vpp_gpe_ctx->kernel_param_size);
 465     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
 466
 467     return VA_STATUS_SUCCESS;
 468 }
 469
 470 static VAStatus
 471 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
 472                            struct vpp_gpe_context *vpp_gpe_ctx)
 473 {
 474     unsigned int *command_ptr;
 475     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 476     unsigned char* position = NULL;
 477
 478     /* Thread inline data setting*/
 479     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 480     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 481
 482     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 483     {
 484          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 485          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 486          *command_ptr++ = 0;
 487          *command_ptr++ = 0;
 488          *command_ptr++ = 0;
 489          *command_ptr++ = 0;
 490
 491          /* copy thread inline data */
 492          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 493          memcpy(command_ptr, position, size);
 494          command_ptr += size/sizeof(int);
 495
 496          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
 497          *command_ptr++ = 0;
 498     }
 499
 500     *command_ptr++ = 0;
 501     *command_ptr++ = MI_BATCH_BUFFER_END;
 502
 503     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 504
 505     return VA_STATUS_SUCCESS;
 506 }
 507
 508 static VAStatus
 509 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
 510                    struct vpp_gpe_context *vpp_gpe_ctx)
 511 {
 512     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 513     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 514
 515     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 516
 517     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 518
 519     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
 520     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
 521     OUT_RELOC(vpp_gpe_ctx->batch,
 522               vpp_gpe_ctx->vpp_batchbuffer.bo,
 523               I915_GEM_DOMAIN_COMMAND, 0,
 524               0);
 525     OUT_BATCH(vpp_gpe_ctx->batch, 0);
 526
 527     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 528
 529     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 530
 531     return VA_STATUS_SUCCESS;
 532 }
 533
 534 static VAStatus
 535 gen8_gpe_process_init(VADriverContextP ctx,
 536                  struct vpp_gpe_context *vpp_gpe_ctx)
 537 {
 538     struct i965_driver_data *i965 = i965_driver_data(ctx);
 539     dri_bo *bo;
 540
 541     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 542                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 543
 544     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 545     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 546     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 547
 548     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 549            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 550
 551     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 552     bo = dri_bo_alloc(i965->intel.bufmgr,
 553                       "vpp batch buffer",
 554                        batch_buf_size, 0x1000);
 555     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 556     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 557
 558     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 559     bo = dri_bo_alloc(i965->intel.bufmgr,
 560                       "vpp kernel return buffer",
 561                        kernel_return_size, 0x1000);
 562     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 563     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 564
 565     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 566
 567     return VA_STATUS_SUCCESS;
 568 }
 569
 570 static VAStatus
 571 gen8_gpe_process_prepare(VADriverContextP ctx,
 572                     struct vpp_gpe_context *vpp_gpe_ctx)
 573 {
 574     /*Setup all the memory object*/
 575     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 576     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 577     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 578
 579     /*Programing media pipeline*/
 580     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 581
 582     return VA_STATUS_SUCCESS;
 583 }
 584
 585 static VAStatus
 586 gen8_gpe_process_run(VADriverContextP ctx,
 587                 struct vpp_gpe_context *vpp_gpe_ctx)
 588 {
 589     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 590
 591     return VA_STATUS_SUCCESS;
 592 }
 593
 594 static VAStatus
 595 gen8_gpe_process(VADriverContextP ctx,
 596                   struct vpp_gpe_context * vpp_gpe_ctx)
 597 {
 598     VAStatus va_status = VA_STATUS_SUCCESS;
 599
 600     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
 601     if (va_status != VA_STATUS_SUCCESS)
 602         return va_status;
 603
 604     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
 605     if (va_status != VA_STATUS_SUCCESS)
 606         return va_status;
 607
 608     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
 609     if (va_status != VA_STATUS_SUCCESS)
 610         return va_status;
 611
 612     return VA_STATUS_SUCCESS;
 613 }
 614
 615 static VAStatus
 616 vpp_gpe_process(VADriverContextP ctx,
 617                   struct vpp_gpe_context * vpp_gpe_ctx)
 618 {
 619     struct i965_driver_data *i965 = i965_driver_data(ctx);
 620     if (IS_HASWELL(i965->intel.device_info))
 621        return gen75_gpe_process(ctx, vpp_gpe_ctx);
 622     else if (IS_GEN8(i965->intel.device_info))
 623        return gen8_gpe_process(ctx, vpp_gpe_ctx);
 624
 625      return VA_STATUS_ERROR_UNIMPLEMENTED;
 626 }
 627
 628 static VAStatus
 629 vpp_gpe_process_sharpening(VADriverContextP ctx,
 630                              struct vpp_gpe_context * vpp_gpe_ctx)
 631 {
 632      VAStatus va_status = VA_STATUS_SUCCESS;
 633      struct i965_driver_data *i965 = i965_driver_data(ctx);
 634      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
 635      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
 636
 637      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 638      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
 639      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
 640
 641      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 642
 643      if (!obj_buf ||
 644          !obj_buf->buffer_store ||
 645          !obj_buf->buffer_store->buffer)
 646          goto error;
 647
 648      VAProcFilterParameterBuffer* filter =
 649                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 650      float sharpening_intensity = filter->value;
 651
 652      ThreadParameterSharpening thr_param;
 653      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
 654      unsigned int i;
 655      unsigned char * pos;
 656
 657      if(vpp_gpe_ctx->is_first_frame){
 658          vpp_gpe_ctx->sub_shader_sum = 3;
 659          struct i965_kernel * vpp_kernels;
 660          if (IS_HASWELL(i965->intel.device_info))
 661              vpp_kernels = gen75_vpp_sharpening_kernels;
 662          else if (IS_GEN8(i965->intel.device_info))
 663              vpp_kernels = gen8_vpp_sharpening_kernels;
 664
 665          vpp_gpe_ctx->gpe_load_kernels(ctx,
 666                                &vpp_gpe_ctx->gpe_ctx,
 667                                vpp_kernels,
 668                                vpp_gpe_ctx->sub_shader_sum);
 669      }
 670
 671      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
 672         va_status = i965_CreateSurfaces(ctx,
 673                                        vpp_gpe_ctx->in_frame_w,
 674                                        vpp_gpe_ctx->in_frame_h,
 675                                        VA_RT_FORMAT_YUV420,
 676                                        1,
 677                                        &vpp_gpe_ctx->surface_tmp);
 678        assert(va_status == VA_STATUS_SUCCESS);
 679
 680        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
 681        assert(obj_surf);
 682
 683        if (obj_surf) {
 684            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
 685                                        SUBSAMPLE_YUV420);
 686            vpp_gpe_ctx->surface_tmp_object = obj_surf;
 687        }
 688     }
 689
 690     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
 691     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
 692     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
 693
 694     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
 695     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
 696
 697     /* Step 1: horizontal blur process */
 698     vpp_gpe_ctx->forward_surf_sum = 0;
 699     vpp_gpe_ctx->backward_surf_sum = 0;
 700
 701     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
 702     vpp_gpe_ctx->thread_param_size = thr_param_size;
 703     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 704                                                        *vpp_gpe_ctx->thread_num);
 705     pos = vpp_gpe_ctx->thread_param;
 706
 707     if (!pos) {
 708         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 709     }
 710
 711     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 712         thr_param.base.v_pos = 16 * i;
 713         thr_param.base.h_pos = 0;
 714         memcpy(pos, &thr_param, thr_param_size);
 715         pos += thr_param_size;
 716     }
 717
 718     vpp_gpe_ctx->sub_shader_index = 0;
 719     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 720     free(vpp_gpe_ctx->thread_param);
 721
 722     /* Step 2: vertical blur process */
 723     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
 724     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
 725     vpp_gpe_ctx->forward_surf_sum = 0;
 726     vpp_gpe_ctx->backward_surf_sum = 0;
 727
 728     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
 729     vpp_gpe_ctx->thread_param_size = thr_param_size;
 730     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 731                                                        *vpp_gpe_ctx->thread_num);
 732     pos = vpp_gpe_ctx->thread_param;
 733
 734     if (!pos) {
 735         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 736     }
 737
 738     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 739         thr_param.base.v_pos = 0;
 740         thr_param.base.h_pos = 16 * i;
 741         memcpy(pos, &thr_param, thr_param_size);
 742         pos += thr_param_size;
 743     }
 744
 745     vpp_gpe_ctx->sub_shader_index = 1;
 746     vpp_gpe_process(ctx, vpp_gpe_ctx);
 747     free(vpp_gpe_ctx->thread_param);
 748
 749     /* Step 3: apply the blur to original surface */
 750     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
 751     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
 752     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
 753     vpp_gpe_ctx->forward_surf_sum  = 1;
 754     vpp_gpe_ctx->backward_surf_sum = 0;
 755
 756     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
 757     vpp_gpe_ctx->thread_param_size = thr_param_size;
 758     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 759                                                        *vpp_gpe_ctx->thread_num);
 760     pos = vpp_gpe_ctx->thread_param;
 761
 762     if (!pos) {
 763         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 764     }
 765
 766     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 767         thr_param.base.v_pos = 4 * i;
 768         thr_param.base.h_pos = 0;
 769         memcpy(pos, &thr_param, thr_param_size);
 770         pos += thr_param_size;
 771     }
 772
 773     vpp_gpe_ctx->sub_shader_index = 2;
 774     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 775     free(vpp_gpe_ctx->thread_param);
 776
 777     return va_status;
 778
 779 error:
 780     return VA_STATUS_ERROR_INVALID_PARAMETER;
 781 }
 782
 783 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
 784                     struct vpp_gpe_context * vpp_gpe_ctx)
 785 {
 786     VAStatus va_status = VA_STATUS_SUCCESS;
 787     struct i965_driver_data *i965 = i965_driver_data(ctx);
 788     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 789     VAProcFilterParameterBuffer* filter = NULL;
 790     unsigned int i;
 791     struct object_surface *obj_surface = NULL;
 792
 793     if (pipe->num_filters && !pipe->filters)
 794         goto error;
 795
 796     for(i = 0; i < pipe->num_filters; i++){
 797         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
 798
 799         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 800
 801         if (!obj_buf ||
 802             !obj_buf->buffer_store ||
 803             !obj_buf->buffer_store->buffer)
 804             goto error;
 805
 806         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 807         if(filter->type == VAProcFilterSharpening){
 808            break;
 809         }
 810     }
 811
 812     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
 813     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
 814
 815     vpp_gpe_ctx->forward_surf_sum = 0;
 816     vpp_gpe_ctx->backward_surf_sum = 0;
 817
 818     for(i = 0; i < pipe->num_forward_references; i ++)
 819     {
 820         obj_surface = SURFACE(pipe->forward_references[i]);
 821
 822         assert(obj_surface);
 823         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
 824         vpp_gpe_ctx->forward_surf_sum++;
 825     }
 826
 827     for(i = 0; i < pipe->num_backward_references; i ++)
 828     {
 829         obj_surface = SURFACE(pipe->backward_references[i]);
 830
 831         assert(obj_surface);
 832         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
 833         vpp_gpe_ctx->backward_surf_sum++;
 834     }
 835
 836     obj_surface = vpp_gpe_ctx->surface_input_object[0];
 837     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
 838     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
 839
 840     if(filter && filter->type == VAProcFilterSharpening) {
 841        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
 842     } else {
 843        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
 844     }
 845
 846     vpp_gpe_ctx->is_first_frame = 0;
 847
 848     return va_status;
 849
 850 error:
 851     return VA_STATUS_ERROR_INVALID_PARAMETER;
 852 }
 853
 854 void
 855 vpp_gpe_context_destroy(VADriverContextP ctx,
 856                                struct vpp_gpe_context *vpp_gpe_ctx)
 857 {
 858     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 859     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
 860
 861     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 862     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
 863
 864     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
 865
 866     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
 867         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
 868         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
 869         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 870         vpp_gpe_ctx->surface_tmp_object = NULL;
 871     }
 872
 873     free(vpp_gpe_ctx->batch);
 874
 875     free(vpp_gpe_ctx);
 876 }
 877
 878 struct vpp_gpe_context *
 879 vpp_gpe_context_init(VADriverContextP ctx)
 880 {
 881     struct i965_driver_data *i965 = i965_driver_data(ctx);
 882     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
 883     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
 884
 885     assert(IS_HASWELL(i965->intel.device_info) ||
 886            IS_GEN8(i965->intel.device_info));
 887
 888     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 889     vpp_gpe_ctx->surface_tmp_object = NULL;
 890     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
 891     vpp_gpe_ctx->is_first_frame = 1;
 892
 893     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
 894     gpe_ctx->vfe_state.num_urb_entries = 16;
 895     gpe_ctx->vfe_state.gpgpu_mode = 0;
 896     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
 897     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 898
 899     if (IS_HASWELL(i965->intel.device_info)) {
 900         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
 901         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
 902         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
 903         gpe_ctx->surface_state_binding_table.length =
 904                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 905
 906         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 907         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 908         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
 909
 910     } else if (IS_GEN8(i965->intel.device_info)) {
 911         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
 912         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
 913         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
 914         gpe_ctx->surface_state_binding_table.length =
 915                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 916
 917         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
 918         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
 919
 920     }
 921
 922     return vpp_gpe_ctx;
 923 }
 924