X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fi965_render.c;h=15643f360f3faecebd80364345526fab77b75449;hb=e889cefebad2a2fa2230882b8070309f23054247;hp=7a9ea135e8b667be87480d6cc08a65c872a9fe0a;hpb=6f73ef3314adca2becee8f16339b0ac9ae6e4d74;p=platform%2Fupstream%2Flibva-intel-driver.git diff --git a/src/i965_render.c b/src/i965_render.c index 7a9ea13..15643f3 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -35,6 +35,9 @@ #include #include #include +#include + +#include #include "intel_batchbuffer.h" #include "intel_driver.h" @@ -52,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] = #include "shaders/render/exa_sf.g4b" }; -#define PS_KERNEL_NUM_GRF 32 +#define PS_KERNEL_NUM_GRF 48 #define PS_MAX_THREADS 32 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -62,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] = #include "shaders/render/exa_wm_xy.g4b" #include "shaders/render/exa_wm_src_affine.g4b" #include "shaders/render/exa_wm_src_sample_planar.g4b" +#include "shaders/render/exa_wm_yuv_color_balance.g4b" #include "shaders/render/exa_wm_yuv_rgb.g4b" #include "shaders/render/exa_wm_write.g4b" }; @@ -84,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_xy.g4b.gen5" #include "shaders/render/exa_wm_src_affine.g4b.gen5" #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5" +#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5" #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5" #include "shaders/render/exa_wm_write.g4b.gen5" }; @@ -103,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] = static const uint32_t ps_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_src_affine.g6b" #include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_color_balance.g6b" #include "shaders/render/exa_wm_yuv_rgb.g6b" #include "shaders/render/exa_wm_write.g6b" }; @@ -121,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] = static const uint32_t ps_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_color_balance.g7b" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -131,9 +138,18 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) +/* Programs for Haswell */ +static const uint32_t ps_kernel_static_gen7_haswell[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell" +#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell" +#include "shaders/render/exa_wm_yuv_rgb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -255,6 +271,31 @@ static struct i965_kernel render_kernels_gen7[] = { } }; +static struct i965_kernel render_kernels_gen7_haswell[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen7, + sizeof(sf_kernel_static_gen7), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen7_haswell, + sizeof(ps_kernel_static_gen7_haswell), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen7, + sizeof(ps_subpic_kernel_static_gen7), + NULL + } +}; + #define URB_VS_ENTRIES 8 #define URB_VS_ENTRY_SIZE 1 @@ -267,8 +308,26 @@ static struct i965_kernel render_kernels_gen7[] = { #define URB_SF_ENTRIES 1 #define URB_SF_ENTRY_SIZE 2 -#define URB_CS_ENTRIES 1 -#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; static void i965_render_vs_unit(VADriverContextP ctx) @@ -404,8 +463,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread2.scratch_space_base_pointer = 0; wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -415,12 +474,11 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) if (IS_IRONLAKE(i965->intel.device_id)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ - wm_state->wm5.max_threads = 12 * 6 - 1; } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; - wm_state->wm5.max_threads = 10 * 5 - 1; } + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -470,7 +528,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 1; + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -480,12 +538,11 @@ i965_render_wm_unit(VADriverContextP ctx) if (IS_IRONLAKE(i965->intel.device_id)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ - wm_state->wm5.max_threads = 12 * 6 - 1; } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; - wm_state->wm5.max_threads = 10 * 5 - 1; } + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -695,6 +752,16 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling) } } +/* Set "Shader Channel Select" */ +void +gen7_render_set_surface_scs(struct gen7_surface_state *ss) +{ + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; +} + static void gen7_render_set_surface_state( struct gen7_surface_state *ss, @@ -736,6 +803,7 @@ gen7_render_set_surface_state( gen7_render_set_surface_tiling(ss, tiling); } + static void i965_render_src_surface_state( VADriverContextP ctx, @@ -765,6 +833,8 @@ i965_render_src_surface_state( region, offset, w, h, pitch, format, flags); + if (IS_HASWELL(i965->intel.device_id)) + gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, offset, @@ -790,19 +860,14 @@ i965_render_src_surface_state( static void i965_render_src_surfaces_state( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, unsigned int flags ) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface; int region_pitch; int rw, rh; dri_bo *region; - obj_surface = SURFACE(surface); - assert(obj_surface); - region_pitch = obj_surface->width; rw = obj_surface->orig_width; rh = obj_surface->orig_height; @@ -811,7 +876,7 @@ i965_render_src_surfaces_state( i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, @@ -842,20 +907,15 @@ i965_render_src_surfaces_state( static void i965_subpic_render_src_surfaces_state(VADriverContextP ctx, - VASurfaceID surface) + struct object_surface *obj_surface) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - int w, h; - dri_bo *region; dri_bo *subpic_region; - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); - struct object_image *obj_image = IMAGE(obj_subpic->image); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + struct object_image *obj_image = obj_subpic->obj_image; + assert(obj_surface); assert(obj_surface->bo); - w = obj_surface->width; - h = obj_surface->height; - region = obj_surface->bo; subpic_region = obj_image->bo; /*subpicture surface*/ i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); @@ -888,6 +948,8 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) dest_region->bo, 0, dest_region->width, dest_region->height, dest_region->pitch, format, 0); + if (IS_HASWELL(i965->intel.device_id)) + gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, @@ -951,12 +1013,11 @@ i965_fill_vertex_buffer( static void i965_subpic_render_upload_vertex(VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *output_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; float tex_coords[4], vid_coords[4]; VARectangle dst_rect; @@ -987,7 +1048,7 @@ i965_subpic_render_upload_vertex(VADriverContextP ctx, static void i965_render_upload_vertex( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) @@ -995,13 +1056,9 @@ i965_render_upload_vertex( struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; - struct object_surface *obj_surface; float tex_coords[4], vid_coords[4]; int width, height; - obj_surface = SURFACE(surface); - assert(surface); - width = obj_surface->orig_width; height = obj_surface->orig_height; @@ -1018,37 +1075,93 @@ i965_render_upload_vertex( i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } +#define PI 3.1415926 + static void i965_render_upload_constants(VADriverContextP ctx, - VASurfaceID surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; - struct object_surface *obj_surface = SURFACE(surface); + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); constant_buffer = render_state->curbe.bo->virtual; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') || - obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3')); - *constant_buffer = 2; + assert(obj_surface->fourcc == VA_FOURCC_Y800); + + constant_buffer[0] = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; + if (obj_surface->fourcc == VA_FOURCC_NV12) + constant_buffer[0] = 1; else - *constant_buffer = 0; + constant_buffer[0] = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->curbe.bo); } static void +i965_subpic_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + float *constant_buffer; + float global_alpha = 1.0; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + + if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { + global_alpha = obj_subpic->global_alpha; + } + + dri_bo_map(render_state->curbe.bo, 1); + + assert(render_state->curbe.bo->virtual); + constant_buffer = render_state->curbe.bo->virtual; + *constant_buffer = global_alpha; + + dri_bo_unmap(render_state->curbe.bo); +} + +static void i965_surface_render_state_setup( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -1057,19 +1170,19 @@ i965_surface_render_state_setup( i965_render_vs_unit(ctx); i965_render_sf_unit(ctx); i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface, flags); + i965_render_src_surfaces_state(ctx, obj_surface, flags); i965_render_sampler(ctx); i965_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); - i965_render_upload_constants(ctx, surface); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); } static void i965_subpic_render_state_setup( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) @@ -1077,12 +1190,13 @@ i965_subpic_render_state_setup( i965_render_vs_unit(ctx); i965_render_sf_unit(ctx); i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_subpic_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_subpic_render_cc_unit(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } @@ -1332,7 +1446,7 @@ i965_render_vertex_elements(VADriverContextP ctx) static void i965_render_upload_image_palette( VADriverContextP ctx, - VAImageID image_id, + struct object_image *obj_image, unsigned int alpha ) { @@ -1340,9 +1454,11 @@ i965_render_upload_image_palette( struct intel_batchbuffer *batch = i965->batch; unsigned int i; - struct object_image *obj_image = IMAGE(image_id); assert(obj_image); + if (!obj_image) + return; + if (obj_image->image.num_palette_entries == 0) return; @@ -1421,7 +1537,8 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + IS_GEN7(i965->intel.device_id) || + IS_GEN8(i965->intel.device_id)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -1482,6 +1599,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_pipelined_pointers(ctx); i965_render_urb_layout(ctx); i965_render_cs_urb_layout(ctx); + i965_render_constant_buffer(ctx); i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); @@ -1572,7 +1690,7 @@ i965_render_initialize(VADriverContextP ctx) static void i965_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -1582,7 +1700,7 @@ i965_render_put_surface( struct intel_batchbuffer *batch = i965->batch; i965_render_initialize(ctx); - i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags); + i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags); i965_surface_render_pipeline_setup(ctx); intel_batchbuffer_flush(batch); } @@ -1590,22 +1708,22 @@ i965_render_put_surface( static void i965_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); i965_render_initialize(ctx); - i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect); + i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect); i965_subpic_render_pipeline_setup(ctx); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } @@ -1734,21 +1852,21 @@ gen6_render_depth_stencil_state(VADriverContextP ctx) static void gen6_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags ) { i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface, flags); + i965_render_src_surfaces_state(ctx, obj_surface, flags); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen6_render_color_calc_state(ctx); gen6_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, surface); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } static void @@ -1986,7 +2104,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + (URB_CS_ENTRY_SIZE-1)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -1999,7 +2117,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | @@ -2097,7 +2215,7 @@ gen6_render_emit_states(VADriverContextP ctx, int kernel) static void gen6_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -2107,7 +2225,7 @@ gen6_render_put_surface( struct intel_batchbuffer *batch = i965->batch; gen6_render_initialize(ctx); - gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags); + gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); i965_clear_dest_region(ctx); gen6_render_emit_states(ctx, PS_KERNEL); intel_batchbuffer_flush(batch); @@ -2138,39 +2256,40 @@ gen6_subpicture_render_blend_state(VADriverContextP ctx) static void gen6_subpicture_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen6_render_color_calc_state(ctx); gen6_subpicture_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } static void gen6_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); gen6_render_initialize(ctx); - gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect); + gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } @@ -2248,6 +2367,11 @@ gen7_render_initialize(VADriverContextP ctx) render_state->cc.depth_stencil = bo; } +/* + * for GEN8 + */ +#define ALIGNMENT 64 + static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2324,26 +2448,28 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } + static void gen7_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags ) { i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface, flags); + i965_render_src_surfaces_state(ctx, obj_surface, flags); gen7_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); gen7_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, surface); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } + static void gen7_emit_invarient_states(VADriverContextP ctx) { @@ -2751,7 +2877,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 7); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); OUT_RELOC(batch, render_state->curbe.bo, @@ -2773,7 +2899,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); /* scratch space base offset */ OUT_BATCH(batch, - ((86 - 1) << max_threads_shift) | num_samples | + ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples | GEN7_PS_PUSH_CONSTANT_ENABLE | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); @@ -2871,10 +2997,11 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } + static void gen7_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -2884,12 +3011,13 @@ gen7_render_put_surface( struct intel_batchbuffer *batch = i965->batch; gen7_render_initialize(ctx); - gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags); + gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); i965_clear_dest_region(ctx); gen7_render_emit_states(ctx, PS_KERNEL); intel_batchbuffer_flush(batch); } + static void gen7_subpicture_render_blend_state(VADriverContextP ctx) { @@ -2915,99 +3043,97 @@ gen7_subpicture_render_blend_state(VADriverContextP ctx) static void gen7_subpicture_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); gen7_subpicture_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } static void gen7_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); gen7_render_initialize(ctx); - gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect); + gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } -/* - * global functions - */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); void intel_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; int has_done_scaling = 0; - VASurfaceID in_surface_id = surface; - VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling); + VASurfaceID out_surface_id = i965_post_processing(ctx, + obj_surface, + src_rect, + dst_rect, + flags, + &has_done_scaling); assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID)); - if (out_surface_id != VA_INVALID_ID) - in_surface_id = out_surface_id; + if (out_surface_id != VA_INVALID_ID) { + struct object_surface *new_obj_surface = SURFACE(out_surface_id); + + if (new_obj_surface && new_obj_surface->bo) + obj_surface = new_obj_surface; - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); - else - i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); + if (has_done_scaling) + src_rect = dst_rect; + } + + render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - if (in_surface_id != surface) - i965_DestroySurfaces(ctx, &in_surface_id, 1); + if (out_surface_id != VA_INVALID_ID) + i965_DestroySurfaces(ctx, &out_surface_id, 1); } void intel_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect); - else - i965_render_put_subpicture(ctx, surface, src_rect, dst_rect); + render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } -Bool + +bool i965_render_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -3020,14 +3146,27 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN7(i965->intel.device_id)) - memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels)); - else if (IS_GEN6(i965->intel.device_id)) + if (IS_GEN8(i965->intel.device_id)) { + return gen8_render_init(ctx); + } else if (IS_GEN7(i965->intel.device_id)) { + memcpy(render_state->render_kernels, + (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), + sizeof(render_state->render_kernels)); + render_state->render_put_surface = gen7_render_put_surface; + render_state->render_put_subpicture = gen7_render_put_subpicture; + } else if (IS_GEN6(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); - else if (IS_IRONLAKE(i965->intel.device_id)) + render_state->render_put_surface = gen6_render_put_surface; + render_state->render_put_subpicture = gen6_render_put_subpicture; + } else if (IS_IRONLAKE(i965->intel.device_id)) { memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); - else + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } else { memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; @@ -3048,16 +3187,21 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - return True; + return true; } -Bool +void i965_render_terminate(VADriverContextP ctx) { int i; struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; + if (IS_GEN8(i965->intel.device_id)) { + gen8_render_terminate(ctx); + return; + } + dri_bo_unreference(render_state->curbe.bo); render_state->curbe.bo = NULL; @@ -3093,7 +3237,5 @@ i965_render_terminate(VADriverContextP ctx) free(render_state->draw_region); render_state->draw_region = NULL; } - - return True; }