CHV: Add PCIID placeholders for CHV
[platform/upstream/libva-intel-driver.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37              
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
52 #define CURBE_URB_ENTRY_LENGTH  4               
53
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
57 };
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
60 };
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
63 };
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
65     {
66         "vpp: sharpening(horizontal blur)",
67         VPP_GPE_SHARPENING,
68         gen75_gpe_sharpening_h_blur,                    
69         sizeof(gen75_gpe_sharpening_h_blur),            
70         NULL
71     },
72     {
73         "vpp: sharpening(vertical blur)",
74         VPP_GPE_SHARPENING,
75         gen75_gpe_sharpening_v_blur,                    
76         sizeof(gen75_gpe_sharpening_v_blur),            
77         NULL
78     },
79     {
80         "vpp: sharpening(unmask)",
81         VPP_GPE_SHARPENING,
82         gen75_gpe_sharpening_unmask,                    
83         sizeof(gen75_gpe_sharpening_unmask),            
84         NULL
85     },
86 }; 
87
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
91 };
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
94 };
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
97 };
98
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
100     {
101         "vpp: sharpening(horizontal blur)",
102         VPP_GPE_SHARPENING,
103         gen8_gpe_sharpening_h_blur,
104         sizeof(gen8_gpe_sharpening_h_blur),
105         NULL
106     },
107     {
108         "vpp: sharpening(vertical blur)",
109         VPP_GPE_SHARPENING,
110         gen8_gpe_sharpening_v_blur,
111         sizeof(gen8_gpe_sharpening_v_blur),
112         NULL
113     },
114     {
115         "vpp: sharpening(unmask)",
116         VPP_GPE_SHARPENING,
117         gen8_gpe_sharpening_unmask,
118         sizeof(gen8_gpe_sharpening_unmask),
119         NULL
120     },
121 };
122
123 static VAStatus
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125                    struct vpp_gpe_context *vpp_gpe_ctx)
126 {
127     struct object_surface *obj_surface;
128     unsigned int i = 0;
129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130                                          vpp_gpe_ctx->backward_surf_sum) * 2;
131
132     /* Binding input NV12 surfaces (Luma + Chroma)*/
133     for( i = 0; i < input_surface_sum; i += 2){ 
134          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
135          assert(obj_surface);
136          gen7_gpe_media_rw_surface_setup(ctx,
137                                          &vpp_gpe_ctx->gpe_ctx,
138                                           obj_surface,
139                                           BINDING_TABLE_OFFSET_GEN7(i),
140                                           SURFACE_STATE_OFFSET_GEN7(i));
141
142          gen75_gpe_media_chroma_surface_setup(ctx,
143                                           &vpp_gpe_ctx->gpe_ctx,
144                                           obj_surface,
145                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
146                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
147     }
148
149     /* Binding output NV12 surface(Luma + Chroma) */
150     obj_surface = vpp_gpe_ctx->surface_output_object;
151     assert(obj_surface);
152     gen7_gpe_media_rw_surface_setup(ctx,
153                                     &vpp_gpe_ctx->gpe_ctx,
154                                     obj_surface,
155                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
156                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
157     gen75_gpe_media_chroma_surface_setup(ctx,
158                                     &vpp_gpe_ctx->gpe_ctx,
159                                     obj_surface,
160                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
161                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
162     /* Bind kernel return buffer surface */
163     gen7_gpe_buffer_suface_setup(ctx,
164                                   &vpp_gpe_ctx->gpe_ctx,
165                                   &vpp_gpe_ctx->vpp_kernel_return,
166                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
167                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
168
169     return VA_STATUS_SUCCESS;
170 }
171
172 static VAStatus
173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
174                     struct vpp_gpe_context *vpp_gpe_ctx)
175 {
176     struct gen6_interface_descriptor_data *desc;   
177     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
178     int i; 
179
180     dri_bo_map(bo, 1);
181     assert(bo->virtual);
182     desc = bo->virtual;
183     
184     /*Setup the descritor table*/
185     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
186         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
187         assert(sizeof(*desc) == 32);
188         memset(desc, 0, sizeof(*desc));
189         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
190         desc->desc2.sampler_count = 0; /* FIXME: */
191         desc->desc2.sampler_state_pointer = 0;
192         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
193         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
194         desc->desc4.constant_urb_entry_read_offset = 0;
195         desc->desc4.constant_urb_entry_read_length = 0;
196
197         dri_bo_emit_reloc(bo,   
198                           I915_GEM_DOMAIN_INSTRUCTION, 0,
199                           0,
200                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
201                           kernel->bo);
202         desc++;
203     }
204
205     dri_bo_unmap(bo);
206
207     return VA_STATUS_SUCCESS;
208 }
209
210 static VAStatus 
211 gen75_gpe_process_constant_fill(VADriverContextP ctx,
212                    struct vpp_gpe_context *vpp_gpe_ctx)
213 {
214     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
215     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
216     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;    
217     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, 
218                             vpp_gpe_ctx->kernel_param_size);
219     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
220
221     return VA_STATUS_SUCCESS;
222 }
223
224 static VAStatus 
225 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
226                            struct vpp_gpe_context *vpp_gpe_ctx)
227 {
228     unsigned int *command_ptr;
229     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
230     unsigned char* position = NULL;
231
232     /* Thread inline data setting*/
233     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
234     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
235
236     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
237     {
238          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
239          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
240          *command_ptr++ = 0;
241          *command_ptr++ = 0;
242          *command_ptr++ = 0;
243          *command_ptr++ = 0;
244    
245          /* copy thread inline data */
246          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
247          memcpy(command_ptr, position, size);
248          command_ptr += size/sizeof(int);
249     }   
250
251     *command_ptr++ = 0;
252     *command_ptr++ = MI_BATCH_BUFFER_END;
253
254     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
255
256     return VA_STATUS_SUCCESS;
257 }
258
259 static VAStatus
260 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
261                    struct vpp_gpe_context *vpp_gpe_ctx)
262 {
263     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
264     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
265
266     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
267  
268     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
269    
270     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
271     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
272     OUT_RELOC(vpp_gpe_ctx->batch,
273               vpp_gpe_ctx->vpp_batchbuffer.bo,
274               I915_GEM_DOMAIN_COMMAND, 0, 
275               0);
276     ADVANCE_BATCH(vpp_gpe_ctx->batch);
277
278     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
279         
280     return VA_STATUS_SUCCESS;
281 }
282
283 static VAStatus
284 gen75_gpe_process_init(VADriverContextP ctx,
285                  struct vpp_gpe_context *vpp_gpe_ctx)
286 {
287     struct i965_driver_data *i965 = i965_driver_data(ctx);
288     dri_bo *bo;
289
290     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * 
291                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
292
293     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
294     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
295     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
296     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks   
297            * vpp_gpe_ctx->vpp_kernel_return.size_block;
298  
299     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
300     bo = dri_bo_alloc(i965->intel.bufmgr,
301                       "vpp batch buffer",
302                        batch_buf_size, 0x1000);
303     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
304     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
305
306     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
307     bo = dri_bo_alloc(i965->intel.bufmgr,
308                       "vpp kernel return buffer",
309                        kernel_return_size, 0x1000);
310     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
311     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); 
312
313     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
314
315     return VA_STATUS_SUCCESS;
316 }
317
318 static VAStatus
319 gen75_gpe_process_prepare(VADriverContextP ctx,
320                     struct vpp_gpe_context *vpp_gpe_ctx)
321 {
322     /*Setup all the memory object*/
323     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
324     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
325     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
326
327     /*Programing media pipeline*/
328     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
329         
330     return VA_STATUS_SUCCESS;
331 }
332
333 static VAStatus
334 gen75_gpe_process_run(VADriverContextP ctx,
335                 struct vpp_gpe_context *vpp_gpe_ctx)
336 {
337     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
338     
339     return VA_STATUS_SUCCESS;
340 }
341
342 static VAStatus
343 gen75_gpe_process(VADriverContextP ctx,
344                   struct vpp_gpe_context * vpp_gpe_ctx)
345 {
346     VAStatus va_status = VA_STATUS_SUCCESS;
347
348     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
349     if (va_status != VA_STATUS_SUCCESS)
350         return va_status;
351
352     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
353     if (va_status != VA_STATUS_SUCCESS)
354         return va_status;
355
356     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
357     if (va_status != VA_STATUS_SUCCESS)
358         return va_status;
359
360     return VA_STATUS_SUCCESS;
361 }
362
363 static VAStatus
364 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
365                    struct vpp_gpe_context *vpp_gpe_ctx)
366 {
367     struct object_surface *obj_surface;
368     unsigned int i = 0;
369     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
370                                          vpp_gpe_ctx->backward_surf_sum) * 2;
371
372     /* Binding input NV12 surfaces (Luma + Chroma)*/
373     for( i = 0; i < input_surface_sum; i += 2){
374          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
375          assert(obj_surface);
376          gen8_gpe_media_rw_surface_setup(ctx,
377                                          &vpp_gpe_ctx->gpe_ctx,
378                                           obj_surface,
379                                           BINDING_TABLE_OFFSET_GEN8(i),
380                                           SURFACE_STATE_OFFSET_GEN8(i));
381
382          gen8_gpe_media_chroma_surface_setup(ctx,
383                                           &vpp_gpe_ctx->gpe_ctx,
384                                           obj_surface,
385                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
386                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
387     }
388
389     /* Binding output NV12 surface(Luma + Chroma) */
390     obj_surface = vpp_gpe_ctx->surface_output_object;
391     assert(obj_surface);
392     gen8_gpe_media_rw_surface_setup(ctx,
393                                     &vpp_gpe_ctx->gpe_ctx,
394                                     obj_surface,
395                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
396                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
397     gen8_gpe_media_chroma_surface_setup(ctx,
398                                     &vpp_gpe_ctx->gpe_ctx,
399                                     obj_surface,
400                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
401                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
402     /* Bind kernel return buffer surface */
403     gen7_gpe_buffer_suface_setup(ctx,
404                                   &vpp_gpe_ctx->gpe_ctx,
405                                   &vpp_gpe_ctx->vpp_kernel_return,
406                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
407                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
408
409     return VA_STATUS_SUCCESS;
410 }
411
412 static VAStatus
413 gen8_gpe_process_interface_setup(VADriverContextP ctx,
414                     struct vpp_gpe_context *vpp_gpe_ctx)
415 {
416     struct gen8_interface_descriptor_data *desc;
417     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
418     int i;
419
420     dri_bo_map(bo, 1);
421     assert(bo->virtual);
422     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
423                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
424
425     /*Setup the descritor table*/
426     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
427         struct i965_kernel *kernel;
428         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
429         assert(sizeof(*desc) == 32);
430         /*Setup the descritor table*/
431          memset(desc, 0, sizeof(*desc));
432          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
433          desc->desc3.sampler_count = 0; /* FIXME: */
434          desc->desc3.sampler_state_pointer = 0;
435          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
436          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
437          desc->desc5.constant_urb_entry_read_offset = 0;
438          desc->desc5.constant_urb_entry_read_length = 0;
439
440          desc++;
441     }
442
443     dri_bo_unmap(bo);
444
445     return VA_STATUS_SUCCESS;
446 }
447
448 static VAStatus
449 gen8_gpe_process_constant_fill(VADriverContextP ctx,
450                    struct vpp_gpe_context *vpp_gpe_ctx)
451 {
452     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
453     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
454     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
455     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
456                             vpp_gpe_ctx->kernel_param_size);
457     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
458
459     return VA_STATUS_SUCCESS;
460 }
461
462 static VAStatus
463 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
464                            struct vpp_gpe_context *vpp_gpe_ctx)
465 {
466     unsigned int *command_ptr;
467     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
468     unsigned char* position = NULL;
469
470     /* Thread inline data setting*/
471     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
472     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
473
474     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
475     {
476          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
477          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
478          *command_ptr++ = 0;
479          *command_ptr++ = 0;
480          *command_ptr++ = 0;
481          *command_ptr++ = 0;
482
483          /* copy thread inline data */
484          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
485          memcpy(command_ptr, position, size);
486          command_ptr += size/sizeof(int);
487
488          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
489          *command_ptr++ = 0;
490     }
491
492     *command_ptr++ = 0;
493     *command_ptr++ = MI_BATCH_BUFFER_END;
494
495     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
496
497     return VA_STATUS_SUCCESS;
498 }
499
500 static VAStatus
501 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
502                    struct vpp_gpe_context *vpp_gpe_ctx)
503 {
504     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
505     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
506
507     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
508
509     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
510
511     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
512     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
513     OUT_RELOC(vpp_gpe_ctx->batch,
514               vpp_gpe_ctx->vpp_batchbuffer.bo,
515               I915_GEM_DOMAIN_COMMAND, 0,
516               0);
517     OUT_BATCH(vpp_gpe_ctx->batch, 0);
518
519     ADVANCE_BATCH(vpp_gpe_ctx->batch);
520
521     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
522
523     return VA_STATUS_SUCCESS;
524 }
525
526 static VAStatus
527 gen8_gpe_process_init(VADriverContextP ctx,
528                  struct vpp_gpe_context *vpp_gpe_ctx)
529 {
530     struct i965_driver_data *i965 = i965_driver_data(ctx);
531     dri_bo *bo;
532
533     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
534                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
535
536     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
537     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
538     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
539
540     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
541            * vpp_gpe_ctx->vpp_kernel_return.size_block;
542
543     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
544     bo = dri_bo_alloc(i965->intel.bufmgr,
545                       "vpp batch buffer",
546                        batch_buf_size, 0x1000);
547     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
548     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
549
550     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
551     bo = dri_bo_alloc(i965->intel.bufmgr,
552                       "vpp kernel return buffer",
553                        kernel_return_size, 0x1000);
554     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
555     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
556
557     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
558
559     return VA_STATUS_SUCCESS;
560 }
561
562 static VAStatus
563 gen8_gpe_process_prepare(VADriverContextP ctx,
564                     struct vpp_gpe_context *vpp_gpe_ctx)
565 {
566     /*Setup all the memory object*/
567     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
568     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
569     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
570
571     /*Programing media pipeline*/
572     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
573
574     return VA_STATUS_SUCCESS;
575 }
576
577 static VAStatus
578 gen8_gpe_process_run(VADriverContextP ctx,
579                 struct vpp_gpe_context *vpp_gpe_ctx)
580 {
581     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
582
583     return VA_STATUS_SUCCESS;
584 }
585
586 static VAStatus
587 gen8_gpe_process(VADriverContextP ctx,
588                   struct vpp_gpe_context * vpp_gpe_ctx)
589 {
590     VAStatus va_status = VA_STATUS_SUCCESS;
591
592     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
593     if (va_status != VA_STATUS_SUCCESS)
594         return va_status;
595
596     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
597     if (va_status != VA_STATUS_SUCCESS)
598         return va_status;
599
600     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
601     if (va_status != VA_STATUS_SUCCESS)
602         return va_status;
603
604     return VA_STATUS_SUCCESS;
605 }
606
607 static VAStatus
608 vpp_gpe_process(VADriverContextP ctx,
609                   struct vpp_gpe_context * vpp_gpe_ctx)
610 {
611     struct i965_driver_data *i965 = i965_driver_data(ctx);
612     if (IS_HASWELL(i965->intel.device_info))
613        return gen75_gpe_process(ctx, vpp_gpe_ctx);
614     else if (IS_GEN8(i965->intel.device_info))
615        return gen8_gpe_process(ctx, vpp_gpe_ctx);
616
617      return VA_STATUS_ERROR_UNIMPLEMENTED;
618 }
619
620 static VAStatus
621 vpp_gpe_process_sharpening(VADriverContextP ctx,
622                              struct vpp_gpe_context * vpp_gpe_ctx)
623 {
624      VAStatus va_status = VA_STATUS_SUCCESS;
625      struct i965_driver_data *i965 = i965_driver_data(ctx);
626      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
627      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
628
629      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
630      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
631      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
632
633      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
634        
635      if (!obj_buf ||
636          !obj_buf->buffer_store ||
637          !obj_buf->buffer_store->buffer)
638          goto error;
639
640      VAProcFilterParameterBuffer* filter =
641                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
642      float sharpening_intensity = filter->value;
643
644      ThreadParameterSharpening thr_param;
645      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
646      unsigned int i;
647      unsigned char * pos;
648
649      if(vpp_gpe_ctx->is_first_frame){
650          vpp_gpe_ctx->sub_shader_sum = 3;
651          struct i965_kernel * vpp_kernels;
652          if (IS_HASWELL(i965->intel.device_info))
653              vpp_kernels = gen75_vpp_sharpening_kernels;
654          else if (IS_GEN8(i965->intel.device_info))
655              vpp_kernels = gen8_vpp_sharpening_kernels;
656
657          vpp_gpe_ctx->gpe_load_kernels(ctx,
658                                &vpp_gpe_ctx->gpe_ctx,
659                                vpp_kernels,
660                                vpp_gpe_ctx->sub_shader_sum);
661      }
662
663      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
664         va_status = i965_CreateSurfaces(ctx,
665                                        vpp_gpe_ctx->in_frame_w,
666                                        vpp_gpe_ctx->in_frame_h,
667                                        VA_RT_FORMAT_YUV420,
668                                        1,
669                                        &vpp_gpe_ctx->surface_tmp);
670        assert(va_status == VA_STATUS_SUCCESS);
671     
672        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
673        assert(obj_surf);
674
675        if (obj_surf) {
676            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
677                                        SUBSAMPLE_YUV420);
678            vpp_gpe_ctx->surface_tmp_object = obj_surf;
679        }
680     }                
681
682     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
683     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
684     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
685
686     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
687     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
688
689     /* Step 1: horizontal blur process */      
690     vpp_gpe_ctx->forward_surf_sum = 0;
691     vpp_gpe_ctx->backward_surf_sum = 0;
692  
693     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
694     vpp_gpe_ctx->thread_param_size = thr_param_size;
695     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
696                                                        *vpp_gpe_ctx->thread_num);
697     pos = vpp_gpe_ctx->thread_param;
698
699     if (!pos) {
700         return VA_STATUS_ERROR_ALLOCATION_FAILED;
701     }
702
703     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
704         thr_param.base.v_pos = 16 * i;
705         thr_param.base.h_pos = 0;
706         memcpy(pos, &thr_param, thr_param_size);
707         pos += thr_param_size;
708     }
709
710     vpp_gpe_ctx->sub_shader_index = 0;
711     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
712     free(vpp_gpe_ctx->thread_param);
713
714     /* Step 2: vertical blur process */ 
715     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
716     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
717     vpp_gpe_ctx->forward_surf_sum = 0;
718     vpp_gpe_ctx->backward_surf_sum = 0;
719  
720     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
721     vpp_gpe_ctx->thread_param_size = thr_param_size;
722     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
723                                                        *vpp_gpe_ctx->thread_num);
724     pos = vpp_gpe_ctx->thread_param;
725
726     if (!pos) {
727         return VA_STATUS_ERROR_ALLOCATION_FAILED;
728     }
729
730     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
731         thr_param.base.v_pos = 0;
732         thr_param.base.h_pos = 16 * i;
733         memcpy(pos, &thr_param, thr_param_size);
734         pos += thr_param_size;
735     }
736
737     vpp_gpe_ctx->sub_shader_index = 1;
738     vpp_gpe_process(ctx, vpp_gpe_ctx);
739     free(vpp_gpe_ctx->thread_param);
740
741     /* Step 3: apply the blur to original surface */      
742     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
743     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
744     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
745     vpp_gpe_ctx->forward_surf_sum  = 1;
746     vpp_gpe_ctx->backward_surf_sum = 0;
747  
748     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
749     vpp_gpe_ctx->thread_param_size = thr_param_size;
750     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
751                                                        *vpp_gpe_ctx->thread_num);
752     pos = vpp_gpe_ctx->thread_param;
753
754     if (!pos) {
755         return VA_STATUS_ERROR_ALLOCATION_FAILED;
756     }
757
758     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
759         thr_param.base.v_pos = 4 * i;
760         thr_param.base.h_pos = 0;
761         memcpy(pos, &thr_param, thr_param_size);
762         pos += thr_param_size;
763     }
764
765     vpp_gpe_ctx->sub_shader_index = 2;
766     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
767     free(vpp_gpe_ctx->thread_param);
768
769     return va_status;
770
771 error:
772     return VA_STATUS_ERROR_INVALID_PARAMETER;
773 }
774
775 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
776                     struct vpp_gpe_context * vpp_gpe_ctx)
777 {
778     VAStatus va_status = VA_STATUS_SUCCESS;
779     struct i965_driver_data *i965 = i965_driver_data(ctx);
780     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
781     VAProcFilterParameterBuffer* filter = NULL;
782     unsigned int i;
783     struct object_surface *obj_surface = NULL;
784
785     if (pipe->num_filters && !pipe->filters)
786         goto error;
787
788     for(i = 0; i < pipe->num_filters; i++){
789         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
790
791         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
792
793         if (!obj_buf ||
794             !obj_buf->buffer_store ||
795             !obj_buf->buffer_store->buffer)
796             goto error;
797
798         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
799         if(filter->type == VAProcFilterSharpening){
800            break;
801         }
802     }
803        
804     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
805     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
806
807     vpp_gpe_ctx->forward_surf_sum = 0;
808     vpp_gpe_ctx->backward_surf_sum = 0;
809  
810     for(i = 0; i < pipe->num_forward_references; i ++)
811     {
812         obj_surface = SURFACE(pipe->forward_references[i]);
813
814         assert(obj_surface);
815         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
816         vpp_gpe_ctx->forward_surf_sum++;
817     } 
818
819     for(i = 0; i < pipe->num_backward_references; i ++)
820     {
821         obj_surface = SURFACE(pipe->backward_references[i]);
822         
823         assert(obj_surface);
824         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
825         vpp_gpe_ctx->backward_surf_sum++;
826     } 
827
828     obj_surface = vpp_gpe_ctx->surface_input_object[0];
829     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
830     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
831
832     if(filter && filter->type == VAProcFilterSharpening) {
833        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); 
834     } else {
835        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
836     }
837
838     vpp_gpe_ctx->is_first_frame = 0;
839
840     return va_status;
841
842 error:
843     return VA_STATUS_ERROR_INVALID_PARAMETER;
844 }
845
846 void 
847 vpp_gpe_context_destroy(VADriverContextP ctx,
848                                struct vpp_gpe_context *vpp_gpe_ctx)
849 {
850     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
851     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
852
853     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
854     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
855
856     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
857
858     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
859         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
860         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
861         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
862         vpp_gpe_ctx->surface_tmp_object = NULL;
863     }   
864
865     free(vpp_gpe_ctx->batch);
866
867     free(vpp_gpe_ctx);
868 }
869
870 struct vpp_gpe_context *
871 vpp_gpe_context_init(VADriverContextP ctx)
872 {
873     struct i965_driver_data *i965 = i965_driver_data(ctx);
874     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
875     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
876
877     assert(IS_HASWELL(i965->intel.device_info) ||
878            IS_GEN8(i965->intel.device_info));
879
880     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
881     vpp_gpe_ctx->surface_tmp_object = NULL;
882     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
883     vpp_gpe_ctx->is_first_frame = 1;
884
885     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
886     gpe_ctx->vfe_state.num_urb_entries = 16;
887     gpe_ctx->vfe_state.gpgpu_mode = 0;
888     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
889     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
890  
891     if (IS_HASWELL(i965->intel.device_info)) {
892         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
893         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
894         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
895         gpe_ctx->surface_state_binding_table.length =
896                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
897
898         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
899         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
900         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
901
902     } else if (IS_GEN8(i965->intel.device_info)) {
903         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
904         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
905         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
906         gpe_ctx->surface_state_binding_table.length =
907                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
908
909         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
910         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
911
912     }
913
914     return vpp_gpe_ctx;
915 }
916