src/gen75_vpp_gpe.c

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <assert.h>
  32
  33 #include "intel_batchbuffer.h"
  34 #include "intel_driver.h"
  35
  36 #include "i965_structs.h"
  37 #include "i965_defines.h"
  38 #include "i965_drv_video.h"
  39 #include "gen75_vpp_gpe.h"
  40
  41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42 #define MAX_MEDIA_SURFACES_GEN6      34
  43
  44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46
  47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49
  50 #define CURBE_ALLOCATION_SIZE   37
  51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
  52 #define CURBE_URB_ENTRY_LENGTH  4
  53
  54 extern VAStatus
  55 i965_CreateSurfaces(VADriverContextP ctx,
  56                     int width,
  57                     int height,
  58                     int format,
  59                     int num_surfaces,
  60                     VASurfaceID *surfaces);
  61
  62 extern VAStatus
  63 i965_DestroySurfaces(VADriverContextP ctx,
  64                      VASurfaceID *surface_list,
  65                      int num_surfaces);
  66
  67 /* Shaders information for sharpening */
  68 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  69    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  70 };
  71 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  72    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  73 };
  74 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  75    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  76 };
  77 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  78     {
  79         "vpp: sharpening(horizontal blur)",
  80         VPP_GPE_SHARPENING,
  81         gen75_gpe_sharpening_h_blur,
  82         sizeof(gen75_gpe_sharpening_h_blur),
  83         NULL
  84     },
  85     {
  86         "vpp: sharpening(vertical blur)",
  87         VPP_GPE_SHARPENING,
  88         gen75_gpe_sharpening_v_blur,
  89         sizeof(gen75_gpe_sharpening_v_blur),
  90         NULL
  91     },
  92     {
  93         "vpp: sharpening(unmask)",
  94         VPP_GPE_SHARPENING,
  95         gen75_gpe_sharpening_unmask,
  96         sizeof(gen75_gpe_sharpening_unmask),
  97         NULL
  98     },
  99 };
 100
 101 /* sharpening kernels for Broadwell */
 102 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
 103    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
 104 };
 105 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
 106    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
 107 };
 108 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
 109    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
 110 };
 111
 112 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
 113     {
 114         "vpp: sharpening(horizontal blur)",
 115         VPP_GPE_SHARPENING,
 116         gen8_gpe_sharpening_h_blur,
 117         sizeof(gen8_gpe_sharpening_h_blur),
 118         NULL
 119     },
 120     {
 121         "vpp: sharpening(vertical blur)",
 122         VPP_GPE_SHARPENING,
 123         gen8_gpe_sharpening_v_blur,
 124         sizeof(gen8_gpe_sharpening_v_blur),
 125         NULL
 126     },
 127     {
 128         "vpp: sharpening(unmask)",
 129         VPP_GPE_SHARPENING,
 130         gen8_gpe_sharpening_unmask,
 131         sizeof(gen8_gpe_sharpening_unmask),
 132         NULL
 133     },
 134 };
 135
 136 static VAStatus
 137 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
 138                    struct vpp_gpe_context *vpp_gpe_ctx)
 139 {
 140     struct object_surface *obj_surface;
 141     unsigned int i = 0;
 142     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 143                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 144
 145     /* Binding input NV12 surfaces (Luma + Chroma)*/
 146     for( i = 0; i < input_surface_sum; i += 2){
 147          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 148          assert(obj_surface);
 149          gen7_gpe_media_rw_surface_setup(ctx,
 150                                          &vpp_gpe_ctx->gpe_ctx,
 151                                           obj_surface,
 152                                           BINDING_TABLE_OFFSET_GEN7(i),
 153                                           SURFACE_STATE_OFFSET_GEN7(i));
 154
 155          gen75_gpe_media_chroma_surface_setup(ctx,
 156                                           &vpp_gpe_ctx->gpe_ctx,
 157                                           obj_surface,
 158                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
 159                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
 160     }
 161
 162     /* Binding output NV12 surface(Luma + Chroma) */
 163     obj_surface = vpp_gpe_ctx->surface_output_object;
 164     assert(obj_surface);
 165     gen7_gpe_media_rw_surface_setup(ctx,
 166                                     &vpp_gpe_ctx->gpe_ctx,
 167                                     obj_surface,
 168                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
 169                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
 170     gen75_gpe_media_chroma_surface_setup(ctx,
 171                                     &vpp_gpe_ctx->gpe_ctx,
 172                                     obj_surface,
 173                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
 174                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
 175     /* Bind kernel return buffer surface */
 176     gen7_gpe_buffer_suface_setup(ctx,
 177                                   &vpp_gpe_ctx->gpe_ctx,
 178                                   &vpp_gpe_ctx->vpp_kernel_return,
 179                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
 180                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
 181
 182     return VA_STATUS_SUCCESS;
 183 }
 184
 185 static VAStatus
 186 gen75_gpe_process_interface_setup(VADriverContextP ctx,
 187                     struct vpp_gpe_context *vpp_gpe_ctx)
 188 {
 189     struct gen6_interface_descriptor_data *desc;
 190     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 191     int i;
 192
 193     dri_bo_map(bo, 1);
 194     assert(bo->virtual);
 195     desc = bo->virtual;
 196
 197     /*Setup the descritor table*/
 198     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 199         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 200         assert(sizeof(*desc) == 32);
 201         memset(desc, 0, sizeof(*desc));
 202         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
 203         desc->desc2.sampler_count = 0; /* FIXME: */
 204         desc->desc2.sampler_state_pointer = 0;
 205         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
 206         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
 207         desc->desc4.constant_urb_entry_read_offset = 0;
 208         desc->desc4.constant_urb_entry_read_length = 0;
 209
 210         dri_bo_emit_reloc(bo,
 211                           I915_GEM_DOMAIN_INSTRUCTION, 0,
 212                           0,
 213                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
 214                           kernel->bo);
 215         desc++;
 216     }
 217
 218     dri_bo_unmap(bo);
 219
 220     return VA_STATUS_SUCCESS;
 221 }
 222
 223 static VAStatus
 224 gen75_gpe_process_constant_fill(VADriverContextP ctx,
 225                    struct vpp_gpe_context *vpp_gpe_ctx)
 226 {
 227     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
 228     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
 229     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
 230     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 231                             vpp_gpe_ctx->kernel_param_size);
 232     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
 233
 234     return VA_STATUS_SUCCESS;
 235 }
 236
 237 static VAStatus
 238 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
 239                            struct vpp_gpe_context *vpp_gpe_ctx)
 240 {
 241     unsigned int *command_ptr;
 242     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 243     unsigned char* position = NULL;
 244
 245     /* Thread inline data setting*/
 246     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 247     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 248
 249     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 250     {
 251          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 252          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 253          *command_ptr++ = 0;
 254          *command_ptr++ = 0;
 255          *command_ptr++ = 0;
 256          *command_ptr++ = 0;
 257
 258          /* copy thread inline data */
 259          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 260          memcpy(command_ptr, position, size);
 261          command_ptr += size/sizeof(int);
 262     }
 263
 264     *command_ptr++ = 0;
 265     *command_ptr++ = MI_BATCH_BUFFER_END;
 266
 267     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 268
 269     return VA_STATUS_SUCCESS;
 270 }
 271
 272 static VAStatus
 273 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
 274                    struct vpp_gpe_context *vpp_gpe_ctx)
 275 {
 276     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 277     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 278
 279     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 280
 281     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 282
 283     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
 284     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6));
 285     OUT_RELOC(vpp_gpe_ctx->batch,
 286               vpp_gpe_ctx->vpp_batchbuffer.bo,
 287               I915_GEM_DOMAIN_COMMAND, 0,
 288               0);
 289     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 290
 291     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 292
 293     return VA_STATUS_SUCCESS;
 294 }
 295
 296 static VAStatus
 297 gen75_gpe_process_init(VADriverContextP ctx,
 298                  struct vpp_gpe_context *vpp_gpe_ctx)
 299 {
 300     struct i965_driver_data *i965 = i965_driver_data(ctx);
 301     dri_bo *bo;
 302
 303     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 304                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 305
 306     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 307     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 308     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 309     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 310            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 311
 312     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 313     bo = dri_bo_alloc(i965->intel.bufmgr,
 314                       "vpp batch buffer",
 315                        batch_buf_size, 0x1000);
 316     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 317     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 318
 319     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 320     bo = dri_bo_alloc(i965->intel.bufmgr,
 321                       "vpp kernel return buffer",
 322                        kernel_return_size, 0x1000);
 323     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 324     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 325
 326     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 327
 328     return VA_STATUS_SUCCESS;
 329 }
 330
 331 static VAStatus
 332 gen75_gpe_process_prepare(VADriverContextP ctx,
 333                     struct vpp_gpe_context *vpp_gpe_ctx)
 334 {
 335     /*Setup all the memory object*/
 336     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 337     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 338     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 339
 340     /*Programing media pipeline*/
 341     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 342
 343     return VA_STATUS_SUCCESS;
 344 }
 345
 346 static VAStatus
 347 gen75_gpe_process_run(VADriverContextP ctx,
 348                 struct vpp_gpe_context *vpp_gpe_ctx)
 349 {
 350     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 351
 352     return VA_STATUS_SUCCESS;
 353 }
 354
 355 static VAStatus
 356 gen75_gpe_process(VADriverContextP ctx,
 357                   struct vpp_gpe_context * vpp_gpe_ctx)
 358 {
 359     VAStatus va_status = VA_STATUS_SUCCESS;
 360
 361     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
 362     if (va_status != VA_STATUS_SUCCESS)
 363         return va_status;
 364
 365     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
 366     if (va_status != VA_STATUS_SUCCESS)
 367         return va_status;
 368
 369     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
 370     if (va_status != VA_STATUS_SUCCESS)
 371         return va_status;
 372
 373     return VA_STATUS_SUCCESS;
 374 }
 375
 376 static VAStatus
 377 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
 378                    struct vpp_gpe_context *vpp_gpe_ctx)
 379 {
 380     struct object_surface *obj_surface;
 381     unsigned int i = 0;
 382     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 383                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 384
 385     /* Binding input NV12 surfaces (Luma + Chroma)*/
 386     for( i = 0; i < input_surface_sum; i += 2){
 387          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 388          assert(obj_surface);
 389          gen8_gpe_media_rw_surface_setup(ctx,
 390                                          &vpp_gpe_ctx->gpe_ctx,
 391                                           obj_surface,
 392                                           BINDING_TABLE_OFFSET_GEN8(i),
 393                                           SURFACE_STATE_OFFSET_GEN8(i));
 394
 395          gen8_gpe_media_chroma_surface_setup(ctx,
 396                                           &vpp_gpe_ctx->gpe_ctx,
 397                                           obj_surface,
 398                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
 399                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
 400     }
 401
 402     /* Binding output NV12 surface(Luma + Chroma) */
 403     obj_surface = vpp_gpe_ctx->surface_output_object;
 404     assert(obj_surface);
 405     gen8_gpe_media_rw_surface_setup(ctx,
 406                                     &vpp_gpe_ctx->gpe_ctx,
 407                                     obj_surface,
 408                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
 409                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
 410     gen8_gpe_media_chroma_surface_setup(ctx,
 411                                     &vpp_gpe_ctx->gpe_ctx,
 412                                     obj_surface,
 413                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
 414                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
 415     /* Bind kernel return buffer surface */
 416     gen7_gpe_buffer_suface_setup(ctx,
 417                                   &vpp_gpe_ctx->gpe_ctx,
 418                                   &vpp_gpe_ctx->vpp_kernel_return,
 419                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
 420                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
 421
 422     return VA_STATUS_SUCCESS;
 423 }
 424
 425 static VAStatus
 426 gen8_gpe_process_interface_setup(VADriverContextP ctx,
 427                     struct vpp_gpe_context *vpp_gpe_ctx)
 428 {
 429     struct gen8_interface_descriptor_data *desc;
 430     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
 431     int i;
 432
 433     dri_bo_map(bo, 1);
 434     assert(bo->virtual);
 435     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
 436                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
 437
 438     /*Setup the descritor table*/
 439     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 440         struct i965_kernel *kernel;
 441         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 442         assert(sizeof(*desc) == 32);
 443         /*Setup the descritor table*/
 444          memset(desc, 0, sizeof(*desc));
 445          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
 446          desc->desc3.sampler_count = 0; /* FIXME: */
 447          desc->desc3.sampler_state_pointer = 0;
 448          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
 449          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
 450          desc->desc5.constant_urb_entry_read_offset = 0;
 451          desc->desc5.constant_urb_entry_read_length = 0;
 452
 453          desc++;
 454     }
 455
 456     dri_bo_unmap(bo);
 457
 458     return VA_STATUS_SUCCESS;
 459 }
 460
 461 static VAStatus
 462 gen8_gpe_process_constant_fill(VADriverContextP ctx,
 463                    struct vpp_gpe_context *vpp_gpe_ctx)
 464 {
 465     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
 466     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
 467     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
 468     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
 469                             vpp_gpe_ctx->kernel_param_size);
 470     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
 471
 472     return VA_STATUS_SUCCESS;
 473 }
 474
 475 static VAStatus
 476 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
 477                            struct vpp_gpe_context *vpp_gpe_ctx)
 478 {
 479     unsigned int *command_ptr;
 480     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 481     unsigned char* position = NULL;
 482
 483     /* Thread inline data setting*/
 484     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 485     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 486
 487     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 488     {
 489          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 490          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 491          *command_ptr++ = 0;
 492          *command_ptr++ = 0;
 493          *command_ptr++ = 0;
 494          *command_ptr++ = 0;
 495
 496          /* copy thread inline data */
 497          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 498          memcpy(command_ptr, position, size);
 499          command_ptr += size/sizeof(int);
 500
 501          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
 502          *command_ptr++ = 0;
 503     }
 504
 505     *command_ptr++ = 0;
 506     *command_ptr++ = MI_BATCH_BUFFER_END;
 507
 508     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 509
 510     return VA_STATUS_SUCCESS;
 511 }
 512
 513 static VAStatus
 514 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
 515                    struct vpp_gpe_context *vpp_gpe_ctx)
 516 {
 517     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 518     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 519
 520     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 521
 522     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 523
 524     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
 525     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
 526     OUT_RELOC(vpp_gpe_ctx->batch,
 527               vpp_gpe_ctx->vpp_batchbuffer.bo,
 528               I915_GEM_DOMAIN_COMMAND, 0,
 529               0);
 530     OUT_BATCH(vpp_gpe_ctx->batch, 0);
 531
 532     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 533
 534     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 535
 536     return VA_STATUS_SUCCESS;
 537 }
 538
 539 static VAStatus
 540 gen8_gpe_process_init(VADriverContextP ctx,
 541                  struct vpp_gpe_context *vpp_gpe_ctx)
 542 {
 543     struct i965_driver_data *i965 = i965_driver_data(ctx);
 544     dri_bo *bo;
 545
 546     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 547                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 548
 549     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 550     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 551     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 552
 553     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 554            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 555
 556     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 557     bo = dri_bo_alloc(i965->intel.bufmgr,
 558                       "vpp batch buffer",
 559                        batch_buf_size, 0x1000);
 560     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 561     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 562
 563     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 564     bo = dri_bo_alloc(i965->intel.bufmgr,
 565                       "vpp kernel return buffer",
 566                        kernel_return_size, 0x1000);
 567     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 568     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
 569
 570     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 571
 572     return VA_STATUS_SUCCESS;
 573 }
 574
 575 static VAStatus
 576 gen8_gpe_process_prepare(VADriverContextP ctx,
 577                     struct vpp_gpe_context *vpp_gpe_ctx)
 578 {
 579     /*Setup all the memory object*/
 580     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 581     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 582     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 583
 584     /*Programing media pipeline*/
 585     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 586
 587     return VA_STATUS_SUCCESS;
 588 }
 589
 590 static VAStatus
 591 gen8_gpe_process_run(VADriverContextP ctx,
 592                 struct vpp_gpe_context *vpp_gpe_ctx)
 593 {
 594     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 595
 596     return VA_STATUS_SUCCESS;
 597 }
 598
 599 static VAStatus
 600 gen8_gpe_process(VADriverContextP ctx,
 601                   struct vpp_gpe_context * vpp_gpe_ctx)
 602 {
 603     VAStatus va_status = VA_STATUS_SUCCESS;
 604
 605     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
 606     if (va_status != VA_STATUS_SUCCESS)
 607         return va_status;
 608
 609     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
 610     if (va_status != VA_STATUS_SUCCESS)
 611         return va_status;
 612
 613     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
 614     if (va_status != VA_STATUS_SUCCESS)
 615         return va_status;
 616
 617     return VA_STATUS_SUCCESS;
 618 }
 619
 620 static VAStatus
 621 vpp_gpe_process(VADriverContextP ctx,
 622                   struct vpp_gpe_context * vpp_gpe_ctx)
 623 {
 624     struct i965_driver_data *i965 = i965_driver_data(ctx);
 625     if (IS_HASWELL(i965->intel.device_id))
 626        return gen75_gpe_process(ctx, vpp_gpe_ctx);
 627     else if (IS_GEN8(i965->intel.device_id))
 628        return gen8_gpe_process(ctx, vpp_gpe_ctx);
 629
 630      return VA_STATUS_ERROR_UNIMPLEMENTED;
 631 }
 632
 633 static VAStatus
 634 vpp_gpe_process_sharpening(VADriverContextP ctx,
 635                              struct vpp_gpe_context * vpp_gpe_ctx)
 636 {
 637      VAStatus va_status = VA_STATUS_SUCCESS;
 638      struct i965_driver_data *i965 = i965_driver_data(ctx);
 639      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
 640      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
 641
 642      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 643      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
 644      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
 645
 646      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 647
 648      if (!obj_buf ||
 649          !obj_buf->buffer_store ||
 650          !obj_buf->buffer_store->buffer)
 651          goto error;
 652
 653      VAProcFilterParameterBuffer* filter =
 654                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 655      float sharpening_intensity = filter->value;
 656
 657      ThreadParameterSharpening thr_param;
 658      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
 659      unsigned int i;
 660      unsigned char * pos;
 661
 662      if(vpp_gpe_ctx->is_first_frame){
 663          vpp_gpe_ctx->sub_shader_sum = 3;
 664          struct i965_kernel * vpp_kernels;
 665          if (IS_HASWELL(i965->intel.device_id))
 666              vpp_kernels = gen75_vpp_sharpening_kernels;
 667          else if (IS_GEN8(i965->intel.device_id))
 668              vpp_kernels = gen8_vpp_sharpening_kernels;
 669
 670          vpp_gpe_ctx->gpe_load_kernels(ctx,
 671                                &vpp_gpe_ctx->gpe_ctx,
 672                                vpp_kernels,
 673                                vpp_gpe_ctx->sub_shader_sum);
 674      }
 675
 676      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
 677         va_status = i965_CreateSurfaces(ctx,
 678                                        vpp_gpe_ctx->in_frame_w,
 679                                        vpp_gpe_ctx->in_frame_h,
 680                                        VA_RT_FORMAT_YUV420,
 681                                        1,
 682                                        &vpp_gpe_ctx->surface_tmp);
 683        assert(va_status == VA_STATUS_SUCCESS);
 684
 685        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
 686        assert(obj_surf);
 687
 688        if (obj_surf) {
 689            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'),
 690                                        SUBSAMPLE_YUV420);
 691            vpp_gpe_ctx->surface_tmp_object = obj_surf;
 692        }
 693     }
 694
 695     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
 696     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
 697     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
 698
 699     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
 700     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
 701
 702     /* Step 1: horizontal blur process */
 703     vpp_gpe_ctx->forward_surf_sum = 0;
 704     vpp_gpe_ctx->backward_surf_sum = 0;
 705
 706     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
 707     vpp_gpe_ctx->thread_param_size = thr_param_size;
 708     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 709                                                        *vpp_gpe_ctx->thread_num);
 710     pos = vpp_gpe_ctx->thread_param;
 711
 712     if (!pos) {
 713         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 714     }
 715
 716     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 717         thr_param.base.v_pos = 16 * i;
 718         thr_param.base.h_pos = 0;
 719         memcpy(pos, &thr_param, thr_param_size);
 720         pos += thr_param_size;
 721     }
 722
 723     vpp_gpe_ctx->sub_shader_index = 0;
 724     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 725     free(vpp_gpe_ctx->thread_param);
 726
 727     /* Step 2: vertical blur process */
 728     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
 729     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
 730     vpp_gpe_ctx->forward_surf_sum = 0;
 731     vpp_gpe_ctx->backward_surf_sum = 0;
 732
 733     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
 734     vpp_gpe_ctx->thread_param_size = thr_param_size;
 735     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 736                                                        *vpp_gpe_ctx->thread_num);
 737     pos = vpp_gpe_ctx->thread_param;
 738
 739     if (!pos) {
 740         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 741     }
 742
 743     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 744         thr_param.base.v_pos = 0;
 745         thr_param.base.h_pos = 16 * i;
 746         memcpy(pos, &thr_param, thr_param_size);
 747         pos += thr_param_size;
 748     }
 749
 750     vpp_gpe_ctx->sub_shader_index = 1;
 751     vpp_gpe_process(ctx, vpp_gpe_ctx);
 752     free(vpp_gpe_ctx->thread_param);
 753
 754     /* Step 3: apply the blur to original surface */
 755     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
 756     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
 757     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
 758     vpp_gpe_ctx->forward_surf_sum  = 1;
 759     vpp_gpe_ctx->backward_surf_sum = 0;
 760
 761     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
 762     vpp_gpe_ctx->thread_param_size = thr_param_size;
 763     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 764                                                        *vpp_gpe_ctx->thread_num);
 765     pos = vpp_gpe_ctx->thread_param;
 766
 767     if (!pos) {
 768         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 769     }
 770
 771     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 772         thr_param.base.v_pos = 4 * i;
 773         thr_param.base.h_pos = 0;
 774         memcpy(pos, &thr_param, thr_param_size);
 775         pos += thr_param_size;
 776     }
 777
 778     vpp_gpe_ctx->sub_shader_index = 2;
 779     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 780     free(vpp_gpe_ctx->thread_param);
 781
 782     return va_status;
 783
 784 error:
 785     return VA_STATUS_ERROR_INVALID_PARAMETER;
 786 }
 787
 788 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
 789                     struct vpp_gpe_context * vpp_gpe_ctx)
 790 {
 791     VAStatus va_status = VA_STATUS_SUCCESS;
 792     struct i965_driver_data *i965 = i965_driver_data(ctx);
 793     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 794     VAProcFilterParameterBuffer* filter = NULL;
 795     unsigned int i;
 796     struct object_surface *obj_surface = NULL;
 797
 798     if (pipe->num_filters && !pipe->filters)
 799         goto error;
 800
 801     for(i = 0; i < pipe->num_filters; i++){
 802         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
 803
 804         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 805
 806         if (!obj_buf ||
 807             !obj_buf->buffer_store ||
 808             !obj_buf->buffer_store->buffer)
 809             goto error;
 810
 811         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 812         if(filter->type == VAProcFilterSharpening){
 813            break;
 814         }
 815     }
 816
 817     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
 818     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
 819
 820     vpp_gpe_ctx->forward_surf_sum = 0;
 821     vpp_gpe_ctx->backward_surf_sum = 0;
 822
 823     for(i = 0; i < pipe->num_forward_references; i ++)
 824     {
 825         obj_surface = SURFACE(pipe->forward_references[i]);
 826
 827         assert(obj_surface);
 828         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
 829         vpp_gpe_ctx->forward_surf_sum++;
 830     }
 831
 832     for(i = 0; i < pipe->num_backward_references; i ++)
 833     {
 834         obj_surface = SURFACE(pipe->backward_references[i]);
 835
 836         assert(obj_surface);
 837         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
 838         vpp_gpe_ctx->backward_surf_sum++;
 839     }
 840
 841     obj_surface = vpp_gpe_ctx->surface_input_object[0];
 842     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
 843     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
 844
 845     if(filter && filter->type == VAProcFilterSharpening) {
 846        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
 847     } else {
 848        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
 849     }
 850
 851     vpp_gpe_ctx->is_first_frame = 0;
 852
 853     return va_status;
 854
 855 error:
 856     return VA_STATUS_ERROR_INVALID_PARAMETER;
 857 }
 858
 859 void
 860 vpp_gpe_context_destroy(VADriverContextP ctx,
 861                                struct vpp_gpe_context *vpp_gpe_ctx)
 862 {
 863     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 864     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
 865
 866     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 867     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
 868
 869     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
 870
 871     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
 872         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
 873         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
 874         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 875         vpp_gpe_ctx->surface_tmp_object = NULL;
 876     }
 877
 878     free(vpp_gpe_ctx->batch);
 879
 880     free(vpp_gpe_ctx);
 881 }
 882
 883 struct vpp_gpe_context *
 884 vpp_gpe_context_init(VADriverContextP ctx)
 885 {
 886     struct i965_driver_data *i965 = i965_driver_data(ctx);
 887     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
 888     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
 889
 890     assert(IS_HASWELL(i965->intel.device_id) ||
 891            IS_GEN8(i965->intel.device_id));
 892
 893     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 894     vpp_gpe_ctx->surface_tmp_object = NULL;
 895     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
 896     vpp_gpe_ctx->is_first_frame = 1;
 897
 898     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
 899     gpe_ctx->vfe_state.num_urb_entries = 16;
 900     gpe_ctx->vfe_state.gpgpu_mode = 0;
 901     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
 902     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 903
 904     if (IS_HASWELL(i965->intel.device_id)) {
 905         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
 906         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
 907         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
 908         gpe_ctx->surface_state_binding_table.length =
 909                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 910
 911         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 912         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 913         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
 914
 915     } else if (IS_GEN8(i965->intel.device_id)) {
 916         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
 917         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
 918         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
 919         gpe_ctx->surface_state_binding_table.length =
 920                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 921
 922         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
 923         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
 924
 925     }
 926
 927     return vpp_gpe_ctx;
 928 }
 929