Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37              
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
52 #define CURBE_URB_ENTRY_LENGTH  4               
53
54 extern VAStatus 
55 i965_CreateSurfaces(VADriverContextP ctx,
56                     int width,
57                     int height,
58                     int format,
59                     int num_surfaces,
60                     VASurfaceID *surfaces);
61
62 extern VAStatus 
63 i965_DestroySurfaces(VADriverContextP ctx,
64                      VASurfaceID *surface_list,
65                      int num_surfaces);
66
67 /* Shaders information for sharpening */
68 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
69    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
70 };
71 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
72    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
73 };
74 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
75    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
76 };
77 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
78     {
79         "vpp: sharpening(horizontal blur)",
80         VPP_GPE_SHARPENING,
81         gen75_gpe_sharpening_h_blur,                    
82         sizeof(gen75_gpe_sharpening_h_blur),            
83         NULL
84     },
85     {
86         "vpp: sharpening(vertical blur)",
87         VPP_GPE_SHARPENING,
88         gen75_gpe_sharpening_v_blur,                    
89         sizeof(gen75_gpe_sharpening_v_blur),            
90         NULL
91     },
92     {
93         "vpp: sharpening(unmask)",
94         VPP_GPE_SHARPENING,
95         gen75_gpe_sharpening_unmask,                    
96         sizeof(gen75_gpe_sharpening_unmask),            
97         NULL
98     },
99 }; 
100
101 /* sharpening kernels for Broadwell */
102 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
103    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
104 };
105 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
106    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
107 };
108 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
109    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
110 };
111
112 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
113     {
114         "vpp: sharpening(horizontal blur)",
115         VPP_GPE_SHARPENING,
116         gen8_gpe_sharpening_h_blur,
117         sizeof(gen8_gpe_sharpening_h_blur),
118         NULL
119     },
120     {
121         "vpp: sharpening(vertical blur)",
122         VPP_GPE_SHARPENING,
123         gen8_gpe_sharpening_v_blur,
124         sizeof(gen8_gpe_sharpening_v_blur),
125         NULL
126     },
127     {
128         "vpp: sharpening(unmask)",
129         VPP_GPE_SHARPENING,
130         gen8_gpe_sharpening_unmask,
131         sizeof(gen8_gpe_sharpening_unmask),
132         NULL
133     },
134 };
135
136 static VAStatus
137 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
138                    struct vpp_gpe_context *vpp_gpe_ctx)
139 {
140     struct object_surface *obj_surface;
141     unsigned int i = 0;
142     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
143                                          vpp_gpe_ctx->backward_surf_sum) * 2;
144
145     /* Binding input NV12 surfaces (Luma + Chroma)*/
146     for( i = 0; i < input_surface_sum; i += 2){ 
147          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
148          assert(obj_surface);
149          gen7_gpe_media_rw_surface_setup(ctx,
150                                          &vpp_gpe_ctx->gpe_ctx,
151                                           obj_surface,
152                                           BINDING_TABLE_OFFSET_GEN7(i),
153                                           SURFACE_STATE_OFFSET_GEN7(i));
154
155          gen75_gpe_media_chroma_surface_setup(ctx,
156                                           &vpp_gpe_ctx->gpe_ctx,
157                                           obj_surface,
158                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
159                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
160     }
161
162     /* Binding output NV12 surface(Luma + Chroma) */
163     obj_surface = vpp_gpe_ctx->surface_output_object;
164     assert(obj_surface);
165     gen7_gpe_media_rw_surface_setup(ctx,
166                                     &vpp_gpe_ctx->gpe_ctx,
167                                     obj_surface,
168                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
169                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
170     gen75_gpe_media_chroma_surface_setup(ctx,
171                                     &vpp_gpe_ctx->gpe_ctx,
172                                     obj_surface,
173                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
174                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
175     /* Bind kernel return buffer surface */
176     gen7_gpe_buffer_suface_setup(ctx,
177                                   &vpp_gpe_ctx->gpe_ctx,
178                                   &vpp_gpe_ctx->vpp_kernel_return,
179                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
180                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
181
182     return VA_STATUS_SUCCESS;
183 }
184
185 static VAStatus
186 gen75_gpe_process_interface_setup(VADriverContextP ctx,
187                     struct vpp_gpe_context *vpp_gpe_ctx)
188 {
189     struct gen6_interface_descriptor_data *desc;   
190     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
191     int i; 
192
193     dri_bo_map(bo, 1);
194     assert(bo->virtual);
195     desc = bo->virtual;
196     
197     /*Setup the descritor table*/
198     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
199         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
200         assert(sizeof(*desc) == 32);
201         memset(desc, 0, sizeof(*desc));
202         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
203         desc->desc2.sampler_count = 0; /* FIXME: */
204         desc->desc2.sampler_state_pointer = 0;
205         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
206         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
207         desc->desc4.constant_urb_entry_read_offset = 0;
208         desc->desc4.constant_urb_entry_read_length = 0;
209
210         dri_bo_emit_reloc(bo,   
211                           I915_GEM_DOMAIN_INSTRUCTION, 0,
212                           0,
213                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
214                           kernel->bo);
215         desc++;
216     }
217
218     dri_bo_unmap(bo);
219
220     return VA_STATUS_SUCCESS;
221 }
222
223 static VAStatus 
224 gen75_gpe_process_constant_fill(VADriverContextP ctx,
225                    struct vpp_gpe_context *vpp_gpe_ctx)
226 {
227     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
228     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
229     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;    
230     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, 
231                             vpp_gpe_ctx->kernel_param_size);
232     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
233
234     return VA_STATUS_SUCCESS;
235 }
236
237 static VAStatus 
238 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
239                            struct vpp_gpe_context *vpp_gpe_ctx)
240 {
241     unsigned int *command_ptr;
242     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
243     unsigned char* position = NULL;
244
245     /* Thread inline data setting*/
246     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
247     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
248
249     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
250     {
251          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
252          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
253          *command_ptr++ = 0;
254          *command_ptr++ = 0;
255          *command_ptr++ = 0;
256          *command_ptr++ = 0;
257    
258          /* copy thread inline data */
259          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
260          memcpy(command_ptr, position, size);
261          command_ptr += size/sizeof(int);
262     }   
263
264     *command_ptr++ = 0;
265     *command_ptr++ = MI_BATCH_BUFFER_END;
266
267     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
268
269     return VA_STATUS_SUCCESS;
270 }
271
272 static VAStatus
273 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
274                    struct vpp_gpe_context *vpp_gpe_ctx)
275 {
276     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
277     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
278
279     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
280  
281     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
282    
283     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
284     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6));
285     OUT_RELOC(vpp_gpe_ctx->batch,
286               vpp_gpe_ctx->vpp_batchbuffer.bo,
287               I915_GEM_DOMAIN_COMMAND, 0, 
288               0);
289     ADVANCE_BATCH(vpp_gpe_ctx->batch);
290
291     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
292         
293     return VA_STATUS_SUCCESS;
294 }
295
296 static VAStatus
297 gen75_gpe_process_init(VADriverContextP ctx,
298                  struct vpp_gpe_context *vpp_gpe_ctx)
299 {
300     struct i965_driver_data *i965 = i965_driver_data(ctx);
301     dri_bo *bo;
302
303     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * 
304                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
305
306     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
307     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
308     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
309     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks   
310            * vpp_gpe_ctx->vpp_kernel_return.size_block;
311  
312     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
313     bo = dri_bo_alloc(i965->intel.bufmgr,
314                       "vpp batch buffer",
315                        batch_buf_size, 0x1000);
316     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
317     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
318
319     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
320     bo = dri_bo_alloc(i965->intel.bufmgr,
321                       "vpp kernel return buffer",
322                        kernel_return_size, 0x1000);
323     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
324     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); 
325
326     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
327
328     return VA_STATUS_SUCCESS;
329 }
330
331 static VAStatus
332 gen75_gpe_process_prepare(VADriverContextP ctx,
333                     struct vpp_gpe_context *vpp_gpe_ctx)
334 {
335     /*Setup all the memory object*/
336     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
337     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
338     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
339
340     /*Programing media pipeline*/
341     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
342         
343     return VA_STATUS_SUCCESS;
344 }
345
346 static VAStatus
347 gen75_gpe_process_run(VADriverContextP ctx,
348                 struct vpp_gpe_context *vpp_gpe_ctx)
349 {
350     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
351     
352     return VA_STATUS_SUCCESS;
353 }
354
355 static VAStatus
356 gen75_gpe_process(VADriverContextP ctx,
357                   struct vpp_gpe_context * vpp_gpe_ctx)
358 {
359     VAStatus va_status = VA_STATUS_SUCCESS;
360
361     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
362     if (va_status != VA_STATUS_SUCCESS)
363         return va_status;
364
365     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
366     if (va_status != VA_STATUS_SUCCESS)
367         return va_status;
368
369     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
370     if (va_status != VA_STATUS_SUCCESS)
371         return va_status;
372
373     return VA_STATUS_SUCCESS;
374 }
375
376 static VAStatus
377 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
378                    struct vpp_gpe_context *vpp_gpe_ctx)
379 {
380     struct object_surface *obj_surface;
381     unsigned int i = 0;
382     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
383                                          vpp_gpe_ctx->backward_surf_sum) * 2;
384
385     /* Binding input NV12 surfaces (Luma + Chroma)*/
386     for( i = 0; i < input_surface_sum; i += 2){
387          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
388          assert(obj_surface);
389          gen8_gpe_media_rw_surface_setup(ctx,
390                                          &vpp_gpe_ctx->gpe_ctx,
391                                           obj_surface,
392                                           BINDING_TABLE_OFFSET_GEN8(i),
393                                           SURFACE_STATE_OFFSET_GEN8(i));
394
395          gen8_gpe_media_chroma_surface_setup(ctx,
396                                           &vpp_gpe_ctx->gpe_ctx,
397                                           obj_surface,
398                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
399                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
400     }
401
402     /* Binding output NV12 surface(Luma + Chroma) */
403     obj_surface = vpp_gpe_ctx->surface_output_object;
404     assert(obj_surface);
405     gen8_gpe_media_rw_surface_setup(ctx,
406                                     &vpp_gpe_ctx->gpe_ctx,
407                                     obj_surface,
408                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
409                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
410     gen8_gpe_media_chroma_surface_setup(ctx,
411                                     &vpp_gpe_ctx->gpe_ctx,
412                                     obj_surface,
413                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
414                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
415     /* Bind kernel return buffer surface */
416     gen7_gpe_buffer_suface_setup(ctx,
417                                   &vpp_gpe_ctx->gpe_ctx,
418                                   &vpp_gpe_ctx->vpp_kernel_return,
419                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
420                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
421
422     return VA_STATUS_SUCCESS;
423 }
424
425 static VAStatus
426 gen8_gpe_process_interface_setup(VADriverContextP ctx,
427                     struct vpp_gpe_context *vpp_gpe_ctx)
428 {
429     struct gen8_interface_descriptor_data *desc;
430     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
431     int i;
432
433     dri_bo_map(bo, 1);
434     assert(bo->virtual);
435     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
436                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
437
438     /*Setup the descritor table*/
439     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
440         struct i965_kernel *kernel;
441         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
442         assert(sizeof(*desc) == 32);
443         /*Setup the descritor table*/
444          memset(desc, 0, sizeof(*desc));
445          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
446          desc->desc3.sampler_count = 0; /* FIXME: */
447          desc->desc3.sampler_state_pointer = 0;
448          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
449          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
450          desc->desc5.constant_urb_entry_read_offset = 0;
451          desc->desc5.constant_urb_entry_read_length = 0;
452
453          desc++;
454     }
455
456     dri_bo_unmap(bo);
457
458     return VA_STATUS_SUCCESS;
459 }
460
461 static VAStatus
462 gen8_gpe_process_constant_fill(VADriverContextP ctx,
463                    struct vpp_gpe_context *vpp_gpe_ctx)
464 {
465     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
466     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
467     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
468     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
469                             vpp_gpe_ctx->kernel_param_size);
470     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
471
472     return VA_STATUS_SUCCESS;
473 }
474
475 static VAStatus
476 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
477                            struct vpp_gpe_context *vpp_gpe_ctx)
478 {
479     unsigned int *command_ptr;
480     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
481     unsigned char* position = NULL;
482
483     /* Thread inline data setting*/
484     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
485     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
486
487     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
488     {
489          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
490          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
491          *command_ptr++ = 0;
492          *command_ptr++ = 0;
493          *command_ptr++ = 0;
494          *command_ptr++ = 0;
495
496          /* copy thread inline data */
497          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
498          memcpy(command_ptr, position, size);
499          command_ptr += size/sizeof(int);
500
501          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
502          *command_ptr++ = 0;
503     }
504
505     *command_ptr++ = 0;
506     *command_ptr++ = MI_BATCH_BUFFER_END;
507
508     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
509
510     return VA_STATUS_SUCCESS;
511 }
512
513 static VAStatus
514 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
515                    struct vpp_gpe_context *vpp_gpe_ctx)
516 {
517     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
518     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
519
520     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
521
522     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
523
524     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
525     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
526     OUT_RELOC(vpp_gpe_ctx->batch,
527               vpp_gpe_ctx->vpp_batchbuffer.bo,
528               I915_GEM_DOMAIN_COMMAND, 0,
529               0);
530     OUT_BATCH(vpp_gpe_ctx->batch, 0);
531
532     ADVANCE_BATCH(vpp_gpe_ctx->batch);
533
534     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
535
536     return VA_STATUS_SUCCESS;
537 }
538
539 static VAStatus
540 gen8_gpe_process_init(VADriverContextP ctx,
541                  struct vpp_gpe_context *vpp_gpe_ctx)
542 {
543     struct i965_driver_data *i965 = i965_driver_data(ctx);
544     dri_bo *bo;
545
546     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
547                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
548
549     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
550     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
551     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
552
553     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
554            * vpp_gpe_ctx->vpp_kernel_return.size_block;
555
556     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
557     bo = dri_bo_alloc(i965->intel.bufmgr,
558                       "vpp batch buffer",
559                        batch_buf_size, 0x1000);
560     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
561     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
562
563     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
564     bo = dri_bo_alloc(i965->intel.bufmgr,
565                       "vpp kernel return buffer",
566                        kernel_return_size, 0x1000);
567     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
568     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
569
570     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
571
572     return VA_STATUS_SUCCESS;
573 }
574
575 static VAStatus
576 gen8_gpe_process_prepare(VADriverContextP ctx,
577                     struct vpp_gpe_context *vpp_gpe_ctx)
578 {
579     /*Setup all the memory object*/
580     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
581     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
582     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
583
584     /*Programing media pipeline*/
585     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
586
587     return VA_STATUS_SUCCESS;
588 }
589
590 static VAStatus
591 gen8_gpe_process_run(VADriverContextP ctx,
592                 struct vpp_gpe_context *vpp_gpe_ctx)
593 {
594     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
595
596     return VA_STATUS_SUCCESS;
597 }
598
599 static VAStatus
600 gen8_gpe_process(VADriverContextP ctx,
601                   struct vpp_gpe_context * vpp_gpe_ctx)
602 {
603     VAStatus va_status = VA_STATUS_SUCCESS;
604
605     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
606     if (va_status != VA_STATUS_SUCCESS)
607         return va_status;
608
609     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
610     if (va_status != VA_STATUS_SUCCESS)
611         return va_status;
612
613     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
614     if (va_status != VA_STATUS_SUCCESS)
615         return va_status;
616
617     return VA_STATUS_SUCCESS;
618 }
619
620 static VAStatus
621 vpp_gpe_process(VADriverContextP ctx,
622                   struct vpp_gpe_context * vpp_gpe_ctx)
623 {
624     struct i965_driver_data *i965 = i965_driver_data(ctx);
625     if (IS_HASWELL(i965->intel.device_id))
626        return gen75_gpe_process(ctx, vpp_gpe_ctx);
627     else if (IS_GEN8(i965->intel.device_id))
628        return gen8_gpe_process(ctx, vpp_gpe_ctx);
629
630      return VA_STATUS_ERROR_UNIMPLEMENTED;
631 }
632
633 static VAStatus
634 vpp_gpe_process_sharpening(VADriverContextP ctx,
635                              struct vpp_gpe_context * vpp_gpe_ctx)
636 {
637      VAStatus va_status = VA_STATUS_SUCCESS;
638      struct i965_driver_data *i965 = i965_driver_data(ctx);
639      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
640      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
641
642      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
643      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
644      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
645
646      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
647        
648      if (!obj_buf ||
649          !obj_buf->buffer_store ||
650          !obj_buf->buffer_store->buffer)
651          goto error;
652
653      VAProcFilterParameterBuffer* filter =
654                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
655      float sharpening_intensity = filter->value;
656
657      ThreadParameterSharpening thr_param;
658      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
659      unsigned int i;
660      unsigned char * pos;
661
662      if(vpp_gpe_ctx->is_first_frame){
663          vpp_gpe_ctx->sub_shader_sum = 3;
664          struct i965_kernel * vpp_kernels;
665          if (IS_HASWELL(i965->intel.device_id))
666              vpp_kernels = gen75_vpp_sharpening_kernels;
667          else if (IS_GEN8(i965->intel.device_id))
668              vpp_kernels = gen8_vpp_sharpening_kernels;
669
670          vpp_gpe_ctx->gpe_load_kernels(ctx,
671                                &vpp_gpe_ctx->gpe_ctx,
672                                vpp_kernels,
673                                vpp_gpe_ctx->sub_shader_sum);
674      }
675
676      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
677         va_status = i965_CreateSurfaces(ctx,
678                                        vpp_gpe_ctx->in_frame_w,
679                                        vpp_gpe_ctx->in_frame_h,
680                                        VA_RT_FORMAT_YUV420,
681                                        1,
682                                        &vpp_gpe_ctx->surface_tmp);
683        assert(va_status == VA_STATUS_SUCCESS);
684     
685        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
686        assert(obj_surf);
687
688        if (obj_surf) {
689            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'),
690                                        SUBSAMPLE_YUV420);
691            vpp_gpe_ctx->surface_tmp_object = obj_surf;
692        }
693     }                
694
695     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
696     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
697     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
698
699     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
700     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
701
702     /* Step 1: horizontal blur process */      
703     vpp_gpe_ctx->forward_surf_sum = 0;
704     vpp_gpe_ctx->backward_surf_sum = 0;
705  
706     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
707     vpp_gpe_ctx->thread_param_size = thr_param_size;
708     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
709                                                        *vpp_gpe_ctx->thread_num);
710     pos = vpp_gpe_ctx->thread_param;
711
712     if (!pos) {
713         return VA_STATUS_ERROR_ALLOCATION_FAILED;
714     }
715
716     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
717         thr_param.base.v_pos = 16 * i;
718         thr_param.base.h_pos = 0;
719         memcpy(pos, &thr_param, thr_param_size);
720         pos += thr_param_size;
721     }
722
723     vpp_gpe_ctx->sub_shader_index = 0;
724     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
725     free(vpp_gpe_ctx->thread_param);
726
727     /* Step 2: vertical blur process */ 
728     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
729     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
730     vpp_gpe_ctx->forward_surf_sum = 0;
731     vpp_gpe_ctx->backward_surf_sum = 0;
732  
733     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
734     vpp_gpe_ctx->thread_param_size = thr_param_size;
735     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
736                                                        *vpp_gpe_ctx->thread_num);
737     pos = vpp_gpe_ctx->thread_param;
738
739     if (!pos) {
740         return VA_STATUS_ERROR_ALLOCATION_FAILED;
741     }
742
743     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
744         thr_param.base.v_pos = 0;
745         thr_param.base.h_pos = 16 * i;
746         memcpy(pos, &thr_param, thr_param_size);
747         pos += thr_param_size;
748     }
749
750     vpp_gpe_ctx->sub_shader_index = 1;
751     vpp_gpe_process(ctx, vpp_gpe_ctx);
752     free(vpp_gpe_ctx->thread_param);
753
754     /* Step 3: apply the blur to original surface */      
755     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
756     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
757     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
758     vpp_gpe_ctx->forward_surf_sum  = 1;
759     vpp_gpe_ctx->backward_surf_sum = 0;
760  
761     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
762     vpp_gpe_ctx->thread_param_size = thr_param_size;
763     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
764                                                        *vpp_gpe_ctx->thread_num);
765     pos = vpp_gpe_ctx->thread_param;
766
767     if (!pos) {
768         return VA_STATUS_ERROR_ALLOCATION_FAILED;
769     }
770
771     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
772         thr_param.base.v_pos = 4 * i;
773         thr_param.base.h_pos = 0;
774         memcpy(pos, &thr_param, thr_param_size);
775         pos += thr_param_size;
776     }
777
778     vpp_gpe_ctx->sub_shader_index = 2;
779     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
780     free(vpp_gpe_ctx->thread_param);
781
782     return va_status;
783
784 error:
785     return VA_STATUS_ERROR_INVALID_PARAMETER;
786 }
787
788 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
789                     struct vpp_gpe_context * vpp_gpe_ctx)
790 {
791     VAStatus va_status = VA_STATUS_SUCCESS;
792     struct i965_driver_data *i965 = i965_driver_data(ctx);
793     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
794     VAProcFilterParameterBuffer* filter = NULL;
795     unsigned int i;
796     struct object_surface *obj_surface = NULL;
797
798     if (pipe->num_filters && !pipe->filters)
799         goto error;
800
801     for(i = 0; i < pipe->num_filters; i++){
802         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
803
804         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
805
806         if (!obj_buf ||
807             !obj_buf->buffer_store ||
808             !obj_buf->buffer_store->buffer)
809             goto error;
810
811         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
812         if(filter->type == VAProcFilterSharpening){
813            break;
814         }
815     }
816        
817     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
818     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
819
820     vpp_gpe_ctx->forward_surf_sum = 0;
821     vpp_gpe_ctx->backward_surf_sum = 0;
822  
823     for(i = 0; i < pipe->num_forward_references; i ++)
824     {
825         obj_surface = SURFACE(pipe->forward_references[i]);
826
827         assert(obj_surface);
828         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
829         vpp_gpe_ctx->forward_surf_sum++;
830     } 
831
832     for(i = 0; i < pipe->num_backward_references; i ++)
833     {
834         obj_surface = SURFACE(pipe->backward_references[i]);
835         
836         assert(obj_surface);
837         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
838         vpp_gpe_ctx->backward_surf_sum++;
839     } 
840
841     obj_surface = vpp_gpe_ctx->surface_input_object[0];
842     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
843     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
844
845     if(filter && filter->type == VAProcFilterSharpening) {
846        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); 
847     } else {
848        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
849     }
850
851     vpp_gpe_ctx->is_first_frame = 0;
852
853     return va_status;
854
855 error:
856     return VA_STATUS_ERROR_INVALID_PARAMETER;
857 }
858
859 void 
860 vpp_gpe_context_destroy(VADriverContextP ctx,
861                                struct vpp_gpe_context *vpp_gpe_ctx)
862 {
863     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
864     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
865
866     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
867     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
868
869     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
870
871     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
872         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
873         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
874         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
875         vpp_gpe_ctx->surface_tmp_object = NULL;
876     }   
877
878     free(vpp_gpe_ctx->batch);
879
880     free(vpp_gpe_ctx);
881 }
882
883 struct vpp_gpe_context *
884 vpp_gpe_context_init(VADriverContextP ctx)
885 {
886     struct i965_driver_data *i965 = i965_driver_data(ctx);
887     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
888     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
889
890     assert(IS_HASWELL(i965->intel.device_id) ||
891            IS_GEN8(i965->intel.device_id));
892
893     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
894     vpp_gpe_ctx->surface_tmp_object = NULL;
895     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
896     vpp_gpe_ctx->is_first_frame = 1;
897
898     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
899     gpe_ctx->vfe_state.num_urb_entries = 16;
900     gpe_ctx->vfe_state.gpgpu_mode = 0;
901     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
902     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
903  
904     if (IS_HASWELL(i965->intel.device_id)) {
905         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
906         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
907         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
908         gpe_ctx->surface_state_binding_table.length =
909                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
910
911         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
912         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
913         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
914
915     } else if (IS_GEN8(i965->intel.device_id)) {
916         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
917         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
918         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
919         gpe_ctx->surface_state_binding_table.length =
920                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
921
922         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
923         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
924
925     }
926
927     return vpp_gpe_ctx;
928 }
929