2e3b104c6d91787778cef920d000a46b28af2da0
[platform/upstream/libva-intel-driver.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37              
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
52 #define CURBE_URB_ENTRY_LENGTH  4               
53
54 extern VAStatus 
55 i965_CreateSurfaces(VADriverContextP ctx,
56                     int width,
57                     int height,
58                     int format,
59                     int num_surfaces,
60                     VASurfaceID *surfaces);
61
62 /* Shaders information for sharpening */
63 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
64    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
65 };
66 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
67    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
68 };
69 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
70    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
71 };
72 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
73     {
74         "vpp: sharpening(horizontal blur)",
75         VPP_GPE_SHARPENING,
76         gen75_gpe_sharpening_h_blur,                    
77         sizeof(gen75_gpe_sharpening_h_blur),            
78         NULL
79     },
80     {
81         "vpp: sharpening(vertical blur)",
82         VPP_GPE_SHARPENING,
83         gen75_gpe_sharpening_v_blur,                    
84         sizeof(gen75_gpe_sharpening_v_blur),            
85         NULL
86     },
87     {
88         "vpp: sharpening(unmask)",
89         VPP_GPE_SHARPENING,
90         gen75_gpe_sharpening_unmask,                    
91         sizeof(gen75_gpe_sharpening_unmask),            
92         NULL
93     },
94 }; 
95
96 /* sharpening kernels for Broadwell */
97 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
98    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
99 };
100 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
101    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
102 };
103 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
104    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
105 };
106
107 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
108     {
109         "vpp: sharpening(horizontal blur)",
110         VPP_GPE_SHARPENING,
111         gen8_gpe_sharpening_h_blur,
112         sizeof(gen8_gpe_sharpening_h_blur),
113         NULL
114     },
115     {
116         "vpp: sharpening(vertical blur)",
117         VPP_GPE_SHARPENING,
118         gen8_gpe_sharpening_v_blur,
119         sizeof(gen8_gpe_sharpening_v_blur),
120         NULL
121     },
122     {
123         "vpp: sharpening(unmask)",
124         VPP_GPE_SHARPENING,
125         gen8_gpe_sharpening_unmask,
126         sizeof(gen8_gpe_sharpening_unmask),
127         NULL
128     },
129 };
130
131 static VAStatus
132 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
133                    struct vpp_gpe_context *vpp_gpe_ctx)
134 {
135     struct object_surface *obj_surface;
136     unsigned int i = 0;
137     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
138                                          vpp_gpe_ctx->backward_surf_sum) * 2;
139
140     /* Binding input NV12 surfaces (Luma + Chroma)*/
141     for( i = 0; i < input_surface_sum; i += 2){ 
142          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
143          assert(obj_surface);
144          gen7_gpe_media_rw_surface_setup(ctx,
145                                          &vpp_gpe_ctx->gpe_ctx,
146                                           obj_surface,
147                                           BINDING_TABLE_OFFSET_GEN7(i),
148                                           SURFACE_STATE_OFFSET_GEN7(i));
149
150          gen75_gpe_media_chroma_surface_setup(ctx,
151                                           &vpp_gpe_ctx->gpe_ctx,
152                                           obj_surface,
153                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
154                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
155     }
156
157     /* Binding output NV12 surface(Luma + Chroma) */
158     obj_surface = vpp_gpe_ctx->surface_output_object;
159     assert(obj_surface);
160     gen7_gpe_media_rw_surface_setup(ctx,
161                                     &vpp_gpe_ctx->gpe_ctx,
162                                     obj_surface,
163                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
164                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
165     gen75_gpe_media_chroma_surface_setup(ctx,
166                                     &vpp_gpe_ctx->gpe_ctx,
167                                     obj_surface,
168                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
169                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
170     /* Bind kernel return buffer surface */
171     gen7_gpe_buffer_suface_setup(ctx,
172                                   &vpp_gpe_ctx->gpe_ctx,
173                                   &vpp_gpe_ctx->vpp_kernel_return,
174                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
175                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
176
177     return VA_STATUS_SUCCESS;
178 }
179
180 static VAStatus
181 gen75_gpe_process_interface_setup(VADriverContextP ctx,
182                     struct vpp_gpe_context *vpp_gpe_ctx)
183 {
184     struct gen6_interface_descriptor_data *desc;   
185     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
186     int i; 
187
188     dri_bo_map(bo, 1);
189     assert(bo->virtual);
190     desc = bo->virtual;
191     
192     /*Setup the descritor table*/
193     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
194         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
195         assert(sizeof(*desc) == 32);
196         memset(desc, 0, sizeof(*desc));
197         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
198         desc->desc2.sampler_count = 0; /* FIXME: */
199         desc->desc2.sampler_state_pointer = 0;
200         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
201         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
202         desc->desc4.constant_urb_entry_read_offset = 0;
203         desc->desc4.constant_urb_entry_read_length = 0;
204
205         dri_bo_emit_reloc(bo,   
206                           I915_GEM_DOMAIN_INSTRUCTION, 0,
207                           0,
208                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
209                           kernel->bo);
210         desc++;
211     }
212
213     dri_bo_unmap(bo);
214
215     return VA_STATUS_SUCCESS;
216 }
217
218 static VAStatus 
219 gen75_gpe_process_constant_fill(VADriverContextP ctx,
220                    struct vpp_gpe_context *vpp_gpe_ctx)
221 {
222     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
223     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
224     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;    
225     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, 
226                             vpp_gpe_ctx->kernel_param_size);
227     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
228
229     return VA_STATUS_SUCCESS;
230 }
231
232 static VAStatus 
233 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
234                            struct vpp_gpe_context *vpp_gpe_ctx)
235 {
236     unsigned int *command_ptr;
237     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
238     unsigned char* position = NULL;
239
240     /* Thread inline data setting*/
241     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
242     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
243
244     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
245     {
246          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
247          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
248          *command_ptr++ = 0;
249          *command_ptr++ = 0;
250          *command_ptr++ = 0;
251          *command_ptr++ = 0;
252    
253          /* copy thread inline data */
254          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
255          memcpy(command_ptr, position, size);
256          command_ptr += size/sizeof(int);
257     }   
258
259     *command_ptr++ = 0;
260     *command_ptr++ = MI_BATCH_BUFFER_END;
261
262     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
263
264     return VA_STATUS_SUCCESS;
265 }
266
267 static VAStatus
268 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
269                    struct vpp_gpe_context *vpp_gpe_ctx)
270 {
271     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
272     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
273
274     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
275  
276     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
277    
278     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
279     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
280     OUT_RELOC(vpp_gpe_ctx->batch,
281               vpp_gpe_ctx->vpp_batchbuffer.bo,
282               I915_GEM_DOMAIN_COMMAND, 0, 
283               0);
284     ADVANCE_BATCH(vpp_gpe_ctx->batch);
285
286     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
287         
288     return VA_STATUS_SUCCESS;
289 }
290
291 static VAStatus
292 gen75_gpe_process_init(VADriverContextP ctx,
293                  struct vpp_gpe_context *vpp_gpe_ctx)
294 {
295     struct i965_driver_data *i965 = i965_driver_data(ctx);
296     dri_bo *bo;
297
298     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * 
299                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
300
301     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
302     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
303     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
304     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks   
305            * vpp_gpe_ctx->vpp_kernel_return.size_block;
306  
307     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
308     bo = dri_bo_alloc(i965->intel.bufmgr,
309                       "vpp batch buffer",
310                        batch_buf_size, 0x1000);
311     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
312     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
313
314     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
315     bo = dri_bo_alloc(i965->intel.bufmgr,
316                       "vpp kernel return buffer",
317                        kernel_return_size, 0x1000);
318     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
319     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); 
320
321     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
322
323     return VA_STATUS_SUCCESS;
324 }
325
326 static VAStatus
327 gen75_gpe_process_prepare(VADriverContextP ctx,
328                     struct vpp_gpe_context *vpp_gpe_ctx)
329 {
330     /*Setup all the memory object*/
331     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
332     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
333     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
334
335     /*Programing media pipeline*/
336     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
337         
338     return VA_STATUS_SUCCESS;
339 }
340
341 static VAStatus
342 gen75_gpe_process_run(VADriverContextP ctx,
343                 struct vpp_gpe_context *vpp_gpe_ctx)
344 {
345     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
346     
347     return VA_STATUS_SUCCESS;
348 }
349
350 static VAStatus
351 gen75_gpe_process(VADriverContextP ctx,
352                   struct vpp_gpe_context * vpp_gpe_ctx)
353 {
354     VAStatus va_status = VA_STATUS_SUCCESS;
355
356     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
357     if (va_status != VA_STATUS_SUCCESS)
358         return va_status;
359
360     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
361     if (va_status != VA_STATUS_SUCCESS)
362         return va_status;
363
364     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
365     if (va_status != VA_STATUS_SUCCESS)
366         return va_status;
367
368     return VA_STATUS_SUCCESS;
369 }
370
371 static VAStatus
372 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
373                    struct vpp_gpe_context *vpp_gpe_ctx)
374 {
375     struct object_surface *obj_surface;
376     unsigned int i = 0;
377     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
378                                          vpp_gpe_ctx->backward_surf_sum) * 2;
379
380     /* Binding input NV12 surfaces (Luma + Chroma)*/
381     for( i = 0; i < input_surface_sum; i += 2){
382          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
383          assert(obj_surface);
384          gen8_gpe_media_rw_surface_setup(ctx,
385                                          &vpp_gpe_ctx->gpe_ctx,
386                                           obj_surface,
387                                           BINDING_TABLE_OFFSET_GEN8(i),
388                                           SURFACE_STATE_OFFSET_GEN8(i));
389
390          gen8_gpe_media_chroma_surface_setup(ctx,
391                                           &vpp_gpe_ctx->gpe_ctx,
392                                           obj_surface,
393                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
394                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
395     }
396
397     /* Binding output NV12 surface(Luma + Chroma) */
398     obj_surface = vpp_gpe_ctx->surface_output_object;
399     assert(obj_surface);
400     gen8_gpe_media_rw_surface_setup(ctx,
401                                     &vpp_gpe_ctx->gpe_ctx,
402                                     obj_surface,
403                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
404                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
405     gen8_gpe_media_chroma_surface_setup(ctx,
406                                     &vpp_gpe_ctx->gpe_ctx,
407                                     obj_surface,
408                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
409                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
410     /* Bind kernel return buffer surface */
411     gen7_gpe_buffer_suface_setup(ctx,
412                                   &vpp_gpe_ctx->gpe_ctx,
413                                   &vpp_gpe_ctx->vpp_kernel_return,
414                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
415                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
416
417     return VA_STATUS_SUCCESS;
418 }
419
420 static VAStatus
421 gen8_gpe_process_interface_setup(VADriverContextP ctx,
422                     struct vpp_gpe_context *vpp_gpe_ctx)
423 {
424     struct gen8_interface_descriptor_data *desc;
425     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
426     int i;
427
428     dri_bo_map(bo, 1);
429     assert(bo->virtual);
430     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
431                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
432
433     /*Setup the descritor table*/
434     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
435         struct i965_kernel *kernel;
436         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
437         assert(sizeof(*desc) == 32);
438         /*Setup the descritor table*/
439          memset(desc, 0, sizeof(*desc));
440          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
441          desc->desc3.sampler_count = 0; /* FIXME: */
442          desc->desc3.sampler_state_pointer = 0;
443          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
444          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
445          desc->desc5.constant_urb_entry_read_offset = 0;
446          desc->desc5.constant_urb_entry_read_length = 0;
447
448          desc++;
449     }
450
451     dri_bo_unmap(bo);
452
453     return VA_STATUS_SUCCESS;
454 }
455
456 static VAStatus
457 gen8_gpe_process_constant_fill(VADriverContextP ctx,
458                    struct vpp_gpe_context *vpp_gpe_ctx)
459 {
460     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
461     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
462     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
463     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
464                             vpp_gpe_ctx->kernel_param_size);
465     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
466
467     return VA_STATUS_SUCCESS;
468 }
469
470 static VAStatus
471 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
472                            struct vpp_gpe_context *vpp_gpe_ctx)
473 {
474     unsigned int *command_ptr;
475     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
476     unsigned char* position = NULL;
477
478     /* Thread inline data setting*/
479     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
480     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
481
482     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
483     {
484          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
485          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
486          *command_ptr++ = 0;
487          *command_ptr++ = 0;
488          *command_ptr++ = 0;
489          *command_ptr++ = 0;
490
491          /* copy thread inline data */
492          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
493          memcpy(command_ptr, position, size);
494          command_ptr += size/sizeof(int);
495
496          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
497          *command_ptr++ = 0;
498     }
499
500     *command_ptr++ = 0;
501     *command_ptr++ = MI_BATCH_BUFFER_END;
502
503     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
504
505     return VA_STATUS_SUCCESS;
506 }
507
508 static VAStatus
509 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
510                    struct vpp_gpe_context *vpp_gpe_ctx)
511 {
512     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
513     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
514
515     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
516
517     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
518
519     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
520     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
521     OUT_RELOC(vpp_gpe_ctx->batch,
522               vpp_gpe_ctx->vpp_batchbuffer.bo,
523               I915_GEM_DOMAIN_COMMAND, 0,
524               0);
525     OUT_BATCH(vpp_gpe_ctx->batch, 0);
526
527     ADVANCE_BATCH(vpp_gpe_ctx->batch);
528
529     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
530
531     return VA_STATUS_SUCCESS;
532 }
533
534 static VAStatus
535 gen8_gpe_process_init(VADriverContextP ctx,
536                  struct vpp_gpe_context *vpp_gpe_ctx)
537 {
538     struct i965_driver_data *i965 = i965_driver_data(ctx);
539     dri_bo *bo;
540
541     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
542                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
543
544     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
545     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
546     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
547
548     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
549            * vpp_gpe_ctx->vpp_kernel_return.size_block;
550
551     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
552     bo = dri_bo_alloc(i965->intel.bufmgr,
553                       "vpp batch buffer",
554                        batch_buf_size, 0x1000);
555     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
556     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
557
558     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
559     bo = dri_bo_alloc(i965->intel.bufmgr,
560                       "vpp kernel return buffer",
561                        kernel_return_size, 0x1000);
562     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
563     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
564
565     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
566
567     return VA_STATUS_SUCCESS;
568 }
569
570 static VAStatus
571 gen8_gpe_process_prepare(VADriverContextP ctx,
572                     struct vpp_gpe_context *vpp_gpe_ctx)
573 {
574     /*Setup all the memory object*/
575     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
576     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
577     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
578
579     /*Programing media pipeline*/
580     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
581
582     return VA_STATUS_SUCCESS;
583 }
584
585 static VAStatus
586 gen8_gpe_process_run(VADriverContextP ctx,
587                 struct vpp_gpe_context *vpp_gpe_ctx)
588 {
589     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
590
591     return VA_STATUS_SUCCESS;
592 }
593
594 static VAStatus
595 gen8_gpe_process(VADriverContextP ctx,
596                   struct vpp_gpe_context * vpp_gpe_ctx)
597 {
598     VAStatus va_status = VA_STATUS_SUCCESS;
599
600     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
601     if (va_status != VA_STATUS_SUCCESS)
602         return va_status;
603
604     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
605     if (va_status != VA_STATUS_SUCCESS)
606         return va_status;
607
608     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
609     if (va_status != VA_STATUS_SUCCESS)
610         return va_status;
611
612     return VA_STATUS_SUCCESS;
613 }
614
615 static VAStatus
616 vpp_gpe_process(VADriverContextP ctx,
617                   struct vpp_gpe_context * vpp_gpe_ctx)
618 {
619     struct i965_driver_data *i965 = i965_driver_data(ctx);
620     if (IS_HASWELL(i965->intel.device_info))
621        return gen75_gpe_process(ctx, vpp_gpe_ctx);
622     else if (IS_GEN8(i965->intel.device_info))
623        return gen8_gpe_process(ctx, vpp_gpe_ctx);
624
625      return VA_STATUS_ERROR_UNIMPLEMENTED;
626 }
627
628 static VAStatus
629 vpp_gpe_process_sharpening(VADriverContextP ctx,
630                              struct vpp_gpe_context * vpp_gpe_ctx)
631 {
632      VAStatus va_status = VA_STATUS_SUCCESS;
633      struct i965_driver_data *i965 = i965_driver_data(ctx);
634      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
635      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
636
637      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
638      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
639      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
640
641      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
642        
643      if (!obj_buf ||
644          !obj_buf->buffer_store ||
645          !obj_buf->buffer_store->buffer)
646          goto error;
647
648      VAProcFilterParameterBuffer* filter =
649                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
650      float sharpening_intensity = filter->value;
651
652      ThreadParameterSharpening thr_param;
653      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
654      unsigned int i;
655      unsigned char * pos;
656
657      if(vpp_gpe_ctx->is_first_frame){
658          vpp_gpe_ctx->sub_shader_sum = 3;
659          struct i965_kernel * vpp_kernels;
660          if (IS_HASWELL(i965->intel.device_info))
661              vpp_kernels = gen75_vpp_sharpening_kernels;
662          else if (IS_GEN8(i965->intel.device_info))
663              vpp_kernels = gen8_vpp_sharpening_kernels;
664
665          vpp_gpe_ctx->gpe_load_kernels(ctx,
666                                &vpp_gpe_ctx->gpe_ctx,
667                                vpp_kernels,
668                                vpp_gpe_ctx->sub_shader_sum);
669      }
670
671      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
672         va_status = i965_CreateSurfaces(ctx,
673                                        vpp_gpe_ctx->in_frame_w,
674                                        vpp_gpe_ctx->in_frame_h,
675                                        VA_RT_FORMAT_YUV420,
676                                        1,
677                                        &vpp_gpe_ctx->surface_tmp);
678        assert(va_status == VA_STATUS_SUCCESS);
679     
680        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
681        assert(obj_surf);
682
683        if (obj_surf) {
684            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
685                                        SUBSAMPLE_YUV420);
686            vpp_gpe_ctx->surface_tmp_object = obj_surf;
687        }
688     }                
689
690     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
691     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
692     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
693
694     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
695     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
696
697     /* Step 1: horizontal blur process */      
698     vpp_gpe_ctx->forward_surf_sum = 0;
699     vpp_gpe_ctx->backward_surf_sum = 0;
700  
701     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
702     vpp_gpe_ctx->thread_param_size = thr_param_size;
703     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
704                                                        *vpp_gpe_ctx->thread_num);
705     pos = vpp_gpe_ctx->thread_param;
706
707     if (!pos) {
708         return VA_STATUS_ERROR_ALLOCATION_FAILED;
709     }
710
711     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
712         thr_param.base.v_pos = 16 * i;
713         thr_param.base.h_pos = 0;
714         memcpy(pos, &thr_param, thr_param_size);
715         pos += thr_param_size;
716     }
717
718     vpp_gpe_ctx->sub_shader_index = 0;
719     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
720     free(vpp_gpe_ctx->thread_param);
721
722     /* Step 2: vertical blur process */ 
723     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
724     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
725     vpp_gpe_ctx->forward_surf_sum = 0;
726     vpp_gpe_ctx->backward_surf_sum = 0;
727  
728     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
729     vpp_gpe_ctx->thread_param_size = thr_param_size;
730     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
731                                                        *vpp_gpe_ctx->thread_num);
732     pos = vpp_gpe_ctx->thread_param;
733
734     if (!pos) {
735         return VA_STATUS_ERROR_ALLOCATION_FAILED;
736     }
737
738     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
739         thr_param.base.v_pos = 0;
740         thr_param.base.h_pos = 16 * i;
741         memcpy(pos, &thr_param, thr_param_size);
742         pos += thr_param_size;
743     }
744
745     vpp_gpe_ctx->sub_shader_index = 1;
746     vpp_gpe_process(ctx, vpp_gpe_ctx);
747     free(vpp_gpe_ctx->thread_param);
748
749     /* Step 3: apply the blur to original surface */      
750     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
751     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
752     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
753     vpp_gpe_ctx->forward_surf_sum  = 1;
754     vpp_gpe_ctx->backward_surf_sum = 0;
755  
756     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
757     vpp_gpe_ctx->thread_param_size = thr_param_size;
758     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
759                                                        *vpp_gpe_ctx->thread_num);
760     pos = vpp_gpe_ctx->thread_param;
761
762     if (!pos) {
763         return VA_STATUS_ERROR_ALLOCATION_FAILED;
764     }
765
766     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
767         thr_param.base.v_pos = 4 * i;
768         thr_param.base.h_pos = 0;
769         memcpy(pos, &thr_param, thr_param_size);
770         pos += thr_param_size;
771     }
772
773     vpp_gpe_ctx->sub_shader_index = 2;
774     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
775     free(vpp_gpe_ctx->thread_param);
776
777     return va_status;
778
779 error:
780     return VA_STATUS_ERROR_INVALID_PARAMETER;
781 }
782
783 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
784                     struct vpp_gpe_context * vpp_gpe_ctx)
785 {
786     VAStatus va_status = VA_STATUS_SUCCESS;
787     struct i965_driver_data *i965 = i965_driver_data(ctx);
788     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
789     VAProcFilterParameterBuffer* filter = NULL;
790     unsigned int i;
791     struct object_surface *obj_surface = NULL;
792
793     if (pipe->num_filters && !pipe->filters)
794         goto error;
795
796     for(i = 0; i < pipe->num_filters; i++){
797         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
798
799         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
800
801         if (!obj_buf ||
802             !obj_buf->buffer_store ||
803             !obj_buf->buffer_store->buffer)
804             goto error;
805
806         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
807         if(filter->type == VAProcFilterSharpening){
808            break;
809         }
810     }
811        
812     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
813     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
814
815     vpp_gpe_ctx->forward_surf_sum = 0;
816     vpp_gpe_ctx->backward_surf_sum = 0;
817  
818     for(i = 0; i < pipe->num_forward_references; i ++)
819     {
820         obj_surface = SURFACE(pipe->forward_references[i]);
821
822         assert(obj_surface);
823         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
824         vpp_gpe_ctx->forward_surf_sum++;
825     } 
826
827     for(i = 0; i < pipe->num_backward_references; i ++)
828     {
829         obj_surface = SURFACE(pipe->backward_references[i]);
830         
831         assert(obj_surface);
832         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
833         vpp_gpe_ctx->backward_surf_sum++;
834     } 
835
836     obj_surface = vpp_gpe_ctx->surface_input_object[0];
837     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
838     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
839
840     if(filter && filter->type == VAProcFilterSharpening) {
841        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); 
842     } else {
843        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
844     }
845
846     vpp_gpe_ctx->is_first_frame = 0;
847
848     return va_status;
849
850 error:
851     return VA_STATUS_ERROR_INVALID_PARAMETER;
852 }
853
854 void 
855 vpp_gpe_context_destroy(VADriverContextP ctx,
856                                struct vpp_gpe_context *vpp_gpe_ctx)
857 {
858     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
859     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
860
861     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
862     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
863
864     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
865
866     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
867         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
868         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
869         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
870         vpp_gpe_ctx->surface_tmp_object = NULL;
871     }   
872
873     free(vpp_gpe_ctx->batch);
874
875     free(vpp_gpe_ctx);
876 }
877
878 struct vpp_gpe_context *
879 vpp_gpe_context_init(VADriverContextP ctx)
880 {
881     struct i965_driver_data *i965 = i965_driver_data(ctx);
882     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
883     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
884
885     assert(IS_HASWELL(i965->intel.device_info) ||
886            IS_GEN8(i965->intel.device_info));
887
888     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
889     vpp_gpe_ctx->surface_tmp_object = NULL;
890     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
891     vpp_gpe_ctx->is_first_frame = 1;
892
893     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
894     gpe_ctx->vfe_state.num_urb_entries = 16;
895     gpe_ctx->vfe_state.gpgpu_mode = 0;
896     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
897     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
898  
899     if (IS_HASWELL(i965->intel.device_info)) {
900         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
901         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
902         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
903         gpe_ctx->surface_state_binding_table.length =
904                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
905
906         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
907         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
908         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
909
910     } else if (IS_GEN8(i965->intel.device_info)) {
911         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
912         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
913         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
914         gpe_ctx->surface_state_binding_table.length =
915                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
916
917         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
918         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
919
920     }
921
922     return vpp_gpe_ctx;
923 }
924