X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fi965_render.c;h=15643f360f3faecebd80364345526fab77b75449;hb=e889cefebad2a2fa2230882b8070309f23054247;hp=fcf63ee2f88b2ceef4fd7cbb91812c5090924490;hpb=afea5457daba2813f7c6012db5b0e15e2a1f189c;p=platform%2Fupstream%2Flibva-intel-driver.git diff --git a/src/i965_render.c b/src/i965_render.c index fcf63ee..15643f3 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -54,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] = #include "shaders/render/exa_sf.g4b" }; -#define PS_KERNEL_NUM_GRF 32 +#define PS_KERNEL_NUM_GRF 48 #define PS_MAX_THREADS 32 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -64,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] = #include "shaders/render/exa_wm_xy.g4b" #include "shaders/render/exa_wm_src_affine.g4b" #include "shaders/render/exa_wm_src_sample_planar.g4b" +#include "shaders/render/exa_wm_yuv_color_balance.g4b" #include "shaders/render/exa_wm_yuv_rgb.g4b" #include "shaders/render/exa_wm_write.g4b" }; @@ -86,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_xy.g4b.gen5" #include "shaders/render/exa_wm_src_affine.g4b.gen5" #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5" +#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5" #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5" #include "shaders/render/exa_wm_write.g4b.gen5" }; @@ -105,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] = static const uint32_t ps_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_src_affine.g6b" #include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_color_balance.g6b" #include "shaders/render/exa_wm_yuv_rgb.g6b" #include "shaders/render/exa_wm_write.g6b" }; @@ -123,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] = static const uint32_t ps_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_color_balance.g7b" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -137,13 +142,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = { static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell" +#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -302,8 +308,26 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { #define URB_SF_ENTRIES 1 #define URB_SF_ENTRY_SIZE 2 -#define URB_CS_ENTRIES 1 -#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; static void i965_render_vs_unit(VADriverContextP ctx) @@ -439,8 +463,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread2.scratch_space_base_pointer = 0; wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -454,7 +478,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -504,7 +528,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 1; + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -518,7 +542,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -779,6 +803,7 @@ gen7_render_set_surface_state( gen7_render_set_surface_tiling(ss, tiling); } + static void i965_render_src_surface_state( VADriverContextP ctx, @@ -851,7 +876,7 @@ i965_render_src_surfaces_state( i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, @@ -1050,29 +1075,62 @@ i965_render_upload_vertex( i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } +#define PI 3.1415926 + static void i965_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); constant_buffer = render_state->curbe.bo->virtual; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') || - obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3')); - *constant_buffer = 2; + assert(obj_surface->fourcc == VA_FOURCC_Y800); + + constant_buffer[0] = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; + if (obj_surface->fourcc == VA_FOURCC_NV12) + constant_buffer[0] = 1; else - *constant_buffer = 0; + constant_buffer[0] = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->curbe.bo); } @@ -1118,7 +1176,7 @@ i965_surface_render_state_setup( i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); } static void @@ -1479,7 +1537,8 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -1540,6 +1599,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_pipelined_pointers(ctx); i965_render_urb_layout(ctx); i965_render_cs_urb_layout(ctx); + i965_render_constant_buffer(ctx); i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); @@ -1805,7 +1865,7 @@ gen6_render_setup_states( gen6_render_color_calc_state(ctx); gen6_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -2044,7 +2104,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + (URB_CS_ENTRY_SIZE-1)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -2057,7 +2117,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | @@ -2307,6 +2367,11 @@ gen7_render_initialize(VADriverContextP ctx) render_state->cc.depth_stencil = bo; } +/* + * for GEN8 + */ +#define ALIGNMENT 64 + static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2383,6 +2448,7 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } + static void gen7_render_setup_states( VADriverContextP ctx, @@ -2399,10 +2465,11 @@ gen7_render_setup_states( gen7_render_color_calc_state(ctx); gen7_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } + static void gen7_emit_invarient_states(VADriverContextP ctx) { @@ -2810,7 +2877,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 7); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); OUT_RELOC(batch, render_state->curbe.bo, @@ -2832,7 +2899,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); /* scratch space base offset */ OUT_BATCH(batch, - ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples | + ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples | GEN7_PS_PUSH_CONSTANT_ENABLE | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); @@ -2930,6 +2997,7 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } + static void gen7_render_put_surface( VADriverContextP ctx, @@ -2949,6 +3017,7 @@ gen7_render_put_surface( intel_batchbuffer_flush(batch); } + static void gen7_subpicture_render_blend_state(VADriverContextP ctx) { @@ -3012,13 +3081,6 @@ gen7_render_put_subpicture( } -/* - * global functions - */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); void intel_render_put_surface( VADriverContextP ctx, @@ -3029,6 +3091,7 @@ intel_render_put_surface( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; int has_done_scaling = 0; VASurfaceID out_surface_id = i965_post_processing(ctx, obj_surface, @@ -3049,12 +3112,7 @@ intel_render_put_surface( src_rect = dst_rect; } - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else - i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); + render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); if (out_surface_id != VA_INVALID_ID) i965_DestroySurfaces(ctx, &out_surface_id, 1); @@ -3069,15 +3127,12 @@ intel_render_put_subpicture( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else - i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); + render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } + bool i965_render_init(VADriverContextP ctx) { @@ -3091,16 +3146,27 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) { + return gen8_render_init(ctx); + } else if (IS_GEN7(i965->intel.device_id)) { memcpy(render_state->render_kernels, (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); - else if (IS_GEN6(i965->intel.device_id)) + render_state->render_put_surface = gen7_render_put_surface; + render_state->render_put_subpicture = gen7_render_put_subpicture; + } else if (IS_GEN6(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); - else if (IS_IRONLAKE(i965->intel.device_id)) + render_state->render_put_surface = gen6_render_put_surface; + render_state->render_put_subpicture = gen6_render_put_subpicture; + } else if (IS_IRONLAKE(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); - else + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } else { memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; @@ -3121,27 +3187,6 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - if (IS_IVB_GT1(i965->intel.device_id) || - IS_HSW_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 48; - } else if (IS_IVB_GT2(i965->intel.device_id) || - IS_HSW_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 172; - } else if (IS_HSW_GT3(i965->intel.device_id)) { - render_state->max_wm_threads = 172; - } else if (IS_SNB_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 40; - } else if (IS_SNB_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 80; - } else if (IS_IRONLAKE(i965->intel.device_id)) { - render_state->max_wm_threads = 72; /* 12 * 6 */ - } else if (IS_G4X(i965->intel.device_id)) { - render_state->max_wm_threads = 50; /* 12 * 5 */ - } else { - /* should never get here !!! */ - assert(0); - } - return true; } @@ -3152,6 +3197,11 @@ i965_render_terminate(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; + if (IS_GEN8(i965->intel.device_id)) { + gen8_render_terminate(ctx); + return; + } + dri_bo_unreference(render_state->curbe.bo); render_state->curbe.bo = NULL;