X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fi965_render.c;h=aed78c0769caae90af5cd0cf19316f79e09ae8dd;hb=69e26f5ca4cad4ac69d47fa0db50f037c197c079;hp=fada70b59a10e1f7403592bb992daf436da141a9;hpb=5c200b34cef770ab9fa9cc76196e7b780843718b;p=platform%2Fupstream%2Flibva-intel-driver.git diff --git a/src/i965_render.c b/src/i965_render.c index fada70b..aed78c0 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -35,8 +35,9 @@ #include #include #include +#include -#include +#include #include "intel_batchbuffer.h" #include "intel_driver.h" @@ -54,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] = #include "shaders/render/exa_sf.g4b" }; -#define PS_KERNEL_NUM_GRF 32 +#define PS_KERNEL_NUM_GRF 48 #define PS_MAX_THREADS 32 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -64,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] = #include "shaders/render/exa_wm_xy.g4b" #include "shaders/render/exa_wm_src_affine.g4b" #include "shaders/render/exa_wm_src_sample_planar.g4b" +#include "shaders/render/exa_wm_yuv_color_balance.g4b" #include "shaders/render/exa_wm_yuv_rgb.g4b" #include "shaders/render/exa_wm_write.g4b" }; @@ -86,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_xy.g4b.gen5" #include "shaders/render/exa_wm_src_affine.g4b.gen5" #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5" +#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5" #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5" #include "shaders/render/exa_wm_write.g4b.gen5" }; @@ -105,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] = static const uint32_t ps_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_src_affine.g6b" #include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_color_balance.g6b" #include "shaders/render/exa_wm_yuv_rgb.g6b" #include "shaders/render/exa_wm_write.g6b" }; @@ -123,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] = static const uint32_t ps_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_color_balance.g7b" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -133,9 +138,18 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) +/* Programs for Haswell */ +static const uint32_t ps_kernel_static_gen7_haswell[][4] = { +#include "shaders/render/exa_wm_src_affine.g7b" +#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell" +#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell" +#include "shaders/render/exa_wm_yuv_rgb.g7b" +#include "shaders/render/exa_wm_write.g7b" +}; + + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -257,6 +271,31 @@ static struct i965_kernel render_kernels_gen7[] = { } }; +static struct i965_kernel render_kernels_gen7_haswell[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen7, + sizeof(sf_kernel_static_gen7), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen7_haswell, + sizeof(ps_kernel_static_gen7_haswell), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen7, + sizeof(ps_subpic_kernel_static_gen7), + NULL + } +}; + #define URB_VS_ENTRIES 8 #define URB_VS_ENTRY_SIZE 1 @@ -269,8 +308,26 @@ static struct i965_kernel render_kernels_gen7[] = { #define URB_SF_ENTRIES 1 #define URB_SF_ENTRY_SIZE 2 -#define URB_CS_ENTRIES 1 -#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; static void i965_render_vs_unit(VADriverContextP ctx) @@ -284,7 +341,7 @@ i965_render_vs_unit(VADriverContextP ctx) vs_state = render_state->vs.state->virtual; memset(vs_state, 0, sizeof(*vs_state)); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; else vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; @@ -398,7 +455,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -406,8 +463,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread2.scratch_space_base_pointer = 0; wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -415,14 +472,13 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ - wm_state->wm5.max_threads = 12 * 6 - 1; } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; - wm_state->wm5.max_threads = 10 * 5 - 1; } + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -463,7 +519,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -472,7 +528,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 1; + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -480,14 +536,13 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ - wm_state->wm5.max_threads = 12 * 6 - 1; } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; - wm_state->wm5.max_threads = 10 * 5 - 1; } + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -637,15 +692,32 @@ i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tilin } static void -i965_render_set_surface_state(struct i965_surface_state *ss, - dri_bo *bo, unsigned long offset, - int width, int height, - int pitch, int format) +i965_render_set_surface_state( + struct i965_surface_state *ss, + dri_bo *bo, + unsigned long offset, + unsigned int width, + unsigned int height, + unsigned int pitch, + unsigned int format, + unsigned int flags +) { unsigned int tiling; unsigned int swizzle; memset(ss, 0, sizeof(*ss)); + + switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { + case I965_PP_FLAG_BOTTOM_FIELD: + ss->ss0.vert_line_stride_ofs = 1; + /* fall-through */ + case I965_PP_FLAG_TOP_FIELD: + ss->ss0.vert_line_stride = 1; + height /= 2; + break; + } + ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = format; ss->ss0.color_blend = 1; @@ -680,17 +752,43 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling) } } +/* Set "Shader Channel Select" */ +void +gen7_render_set_surface_scs(struct gen7_surface_state *ss) +{ + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; +} + static void -gen7_render_set_surface_state(struct gen7_surface_state *ss, - dri_bo *bo, unsigned long offset, - int width, int height, - int pitch, int format) +gen7_render_set_surface_state( + struct gen7_surface_state *ss, + dri_bo *bo, + unsigned long offset, + int width, + int height, + int pitch, + int format, + unsigned int flags +) { unsigned int tiling; unsigned int swizzle; memset(ss, 0, sizeof(*ss)); + switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { + case I965_PP_FLAG_BOTTOM_FIELD: + ss->ss0.vert_line_stride_ofs = 1; + /* fall-through */ + case I965_PP_FLAG_TOP_FIELD: + ss->ss0.vert_line_stride = 1; + height /= 2; + break; + } + ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = format; @@ -705,13 +803,19 @@ gen7_render_set_surface_state(struct gen7_surface_state *ss, gen7_render_set_surface_tiling(ss, tiling); } + static void -i965_render_src_surface_state(VADriverContextP ctx, - int index, - dri_bo *region, - unsigned long offset, - int w, int h, - int pitch, int format) +i965_render_src_surface_state( + VADriverContextP ctx, + int index, + dri_bo *region, + unsigned long offset, + int w, + int h, + int pitch, + int format, + unsigned int flags +) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -724,11 +828,13 @@ i965_render_src_surface_state(VADriverContextP ctx, assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, region, offset, w, h, - pitch, format); + pitch, format, flags); + if (IS_HASWELL(i965->intel.device_info)) + gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, offset, @@ -738,7 +844,7 @@ i965_render_src_surface_state(VADriverContextP ctx, i965_render_set_surface_state(ss, region, offset, w, h, - pitch, format); + pitch, format, flags); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, offset, @@ -752,75 +858,68 @@ i965_render_src_surface_state(VADriverContextP ctx, } static void -i965_render_src_surfaces_state(VADriverContextP ctx, - VASurfaceID surface) +i965_render_src_surfaces_state( + VADriverContextP ctx, + struct object_surface *obj_surface, + unsigned int flags +) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface; int region_pitch; int rw, rh; dri_bo *region; - obj_surface = SURFACE(surface); - assert(obj_surface); - region_pitch = obj_surface->width; rw = obj_surface->orig_width; rh = obj_surface->orig_height; region = obj_surface->bo; - i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM); /* Y */ - i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM); + i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ + i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8G8_UNORM); /* UV */ + I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */ i965_render_src_surface_state(ctx, 4, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8G8_UNORM); + I965_SURFACEFORMAT_R8G8_UNORM, flags); } else { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8_UNORM); /* U */ + I965_SURFACEFORMAT_R8_UNORM, flags); /* U */ i965_render_src_surface_state(ctx, 4, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8_UNORM); + I965_SURFACEFORMAT_R8_UNORM, flags); i965_render_src_surface_state(ctx, 5, region, region_pitch * obj_surface->y_cr_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8_UNORM); /* V */ + I965_SURFACEFORMAT_R8_UNORM, flags); /* V */ i965_render_src_surface_state(ctx, 6, region, region_pitch * obj_surface->y_cr_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, - I965_SURFACEFORMAT_R8_UNORM); + I965_SURFACEFORMAT_R8_UNORM, flags); } } static void i965_subpic_render_src_surfaces_state(VADriverContextP ctx, - VASurfaceID surface) + struct object_surface *obj_surface) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - int w, h; - dri_bo *region; dri_bo *subpic_region; - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); - struct object_image *obj_image = IMAGE(obj_subpic->image); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + struct object_image *obj_image = obj_subpic->obj_image; + assert(obj_surface); assert(obj_surface->bo); - w = obj_surface->width; - h = obj_surface->height; - region = obj_surface->bo; subpic_region = obj_image->bo; /*subpicture surface*/ - i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); - i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format); + i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); + i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); } static void @@ -844,11 +943,13 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, - dest_region->pitch, format); + dest_region->pitch, format, 0); + if (IS_HASWELL(i965->intel.device_info)) + gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, @@ -858,7 +959,7 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) i965_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, - dest_region->pitch, format); + dest_region->pitch, format, 0); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, @@ -870,18 +971,55 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) dri_bo_unmap(ss_bo); } +static void +i965_fill_vertex_buffer( + VADriverContextP ctx, + float tex_coords[4], /* [(u1,v1);(u2,v2)] */ + float vid_coords[4] /* [(x1,y1);(x2,y2)] */ +) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + float vb[12]; + + enum { X1, Y1, X2, Y2 }; + + static const unsigned int g_rotation_indices[][6] = { + [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 }, + [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 }, + [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 }, + [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 }, + }; + + const unsigned int * const rotation_indices = + g_rotation_indices[i965->rotation_attrib->value]; + + vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */ + vb[1] = tex_coords[rotation_indices[1]]; + vb[2] = vid_coords[X2]; + vb[3] = vid_coords[Y2]; + + vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */ + vb[5] = tex_coords[rotation_indices[3]]; + vb[6] = vid_coords[X1]; + vb[7] = vid_coords[Y2]; + + vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */ + vb[9] = tex_coords[rotation_indices[5]]; + vb[10] = vid_coords[X1]; + vb[11] = vid_coords[Y1]; + + dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb); +} + static void i965_subpic_render_upload_vertex(VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *output_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + float tex_coords[4], vid_coords[4]; VARectangle dst_rect; - float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2; - int i = 0; if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) dst_rect = obj_subpic->dst_rect; @@ -894,41 +1032,23 @@ i965_subpic_render_upload_vertex(VADriverContextP ctx, dst_rect.height = sy * obj_subpic->dst_rect.height; } - dri_bo_map(render_state->vb.vertex_buffer, 1); - assert(render_state->vb.vertex_buffer->virtual); - vb = render_state->vb.vertex_buffer->virtual; - - tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width; - ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height; - tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width; - ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height; - - x1 = (float)dst_rect.x; - y1 = (float)dst_rect.y; - x2 = (float)(dst_rect.x + dst_rect.width); - y2 = (float)(dst_rect.y + dst_rect.height); + tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width; + tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height; + tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width; + tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height; - vb[i++] = tx2; - vb[i++] = ty2; - vb[i++] = x2; - vb[i++] = y2; + vid_coords[0] = dst_rect.x; + vid_coords[1] = dst_rect.y; + vid_coords[2] = (float)(dst_rect.x + dst_rect.width); + vid_coords[3] = (float)(dst_rect.y + dst_rect.height); - vb[i++] = tx1; - vb[i++] = ty2; - vb[i++] = x1; - vb[i++] = y2; - - vb[i++] = tx1; - vb[i++] = ty1; - vb[i++] = x1; - vb[i++] = y1; - dri_bo_unmap(render_state->vb.vertex_buffer); + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } static void i965_render_upload_vertex( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) @@ -936,99 +1056,133 @@ i965_render_upload_vertex( struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; struct intel_region *dest_region = render_state->draw_region; - struct object_surface *obj_surface; - float *vb; - - float u1, v1, u2, v2; - int i, width, height; - int box_x1 = dest_region->x + dst_rect->x; - int box_y1 = dest_region->y + dst_rect->y; - int box_x2 = box_x1 + dst_rect->width; - int box_y2 = box_y1 + dst_rect->height; - - obj_surface = SURFACE(surface); - assert(surface); - width = obj_surface->orig_width; - height = obj_surface->orig_height; - - u1 = (float)src_rect->x / width; - v1 = (float)src_rect->y / height; - u2 = (float)(src_rect->x + src_rect->width) / width; - v2 = (float)(src_rect->y + src_rect->height) / height; + float tex_coords[4], vid_coords[4]; + int width, height; - dri_bo_map(render_state->vb.vertex_buffer, 1); - assert(render_state->vb.vertex_buffer->virtual); - vb = render_state->vb.vertex_buffer->virtual; + width = obj_surface->orig_width; + height = obj_surface->orig_height; - i = 0; - vb[i++] = u2; - vb[i++] = v2; - vb[i++] = (float)box_x2; - vb[i++] = (float)box_y2; - - vb[i++] = u1; - vb[i++] = v2; - vb[i++] = (float)box_x1; - vb[i++] = (float)box_y2; + tex_coords[0] = (float)src_rect->x / width; + tex_coords[1] = (float)src_rect->y / height; + tex_coords[2] = (float)(src_rect->x + src_rect->width) / width; + tex_coords[3] = (float)(src_rect->y + src_rect->height) / height; - vb[i++] = u1; - vb[i++] = v1; - vb[i++] = (float)box_x1; - vb[i++] = (float)box_y1; + vid_coords[0] = dest_region->x + dst_rect->x; + vid_coords[1] = dest_region->y + dst_rect->y; + vid_coords[2] = vid_coords[0] + dst_rect->width; + vid_coords[3] = vid_coords[1] + dst_rect->height; - dri_bo_unmap(render_state->vb.vertex_buffer); + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } +#define PI 3.1415926 + static void i965_render_upload_constants(VADriverContextP ctx, - VASurfaceID surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; - struct object_surface *obj_surface = SURFACE(surface); + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); constant_buffer = render_state->curbe.bo->virtual; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') || - obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3')); - *constant_buffer = 2; + assert(obj_surface->fourcc == VA_FOURCC_Y800); + + constant_buffer[0] = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; + if (obj_surface->fourcc == VA_FOURCC_NV12) + constant_buffer[0] = 1; else - *constant_buffer = 0; + constant_buffer[0] = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->curbe.bo); } static void +i965_subpic_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + float *constant_buffer; + float global_alpha = 1.0; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + + if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { + global_alpha = obj_subpic->global_alpha; + } + + dri_bo_map(render_state->curbe.bo, 1); + + assert(render_state->curbe.bo->virtual); + constant_buffer = render_state->curbe.bo->virtual; + *constant_buffer = global_alpha; + + dri_bo_unmap(render_state->curbe.bo); +} + +static void i965_surface_render_state_setup( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, - const VARectangle *dst_rect + const VARectangle *dst_rect, + unsigned int flags ) { i965_render_vs_unit(ctx); i965_render_sf_unit(ctx); i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface); + i965_render_src_surfaces_state(ctx, obj_surface, flags); i965_render_sampler(ctx); i965_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); - i965_render_upload_constants(ctx, surface); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); } + static void i965_subpic_render_state_setup( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) @@ -1036,12 +1190,13 @@ i965_subpic_render_state_setup( i965_render_vs_unit(ctx); i965_render_sf_unit(ctx); i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_subpic_render_wm_unit(ctx); i965_render_cc_viewport(ctx); i965_subpic_render_cc_unit(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } @@ -1075,7 +1230,7 @@ i965_render_state_base_address(VADriverContextP ctx) struct intel_batchbuffer *batch = i965->batch; struct i965_render_state *render_state = &i965->render_state; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -1239,7 +1394,7 @@ i965_render_vertex_elements(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 5); OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ @@ -1291,7 +1446,7 @@ i965_render_vertex_elements(VADriverContextP ctx) static void i965_render_upload_image_palette( VADriverContextP ctx, - VAImageID image_id, + struct object_image *obj_image, unsigned int alpha ) { @@ -1299,9 +1454,11 @@ i965_render_upload_image_palette( struct intel_batchbuffer *batch = i965->batch; unsigned int i; - struct object_image *obj_image = IMAGE(image_id); assert(obj_image); + if (!obj_image) + return; + if (obj_image->image.num_palette_entries == 0) return; @@ -1329,7 +1486,7 @@ i965_render_startup(VADriverContextP ctx) ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); else OUT_BATCH(batch, 3); @@ -1379,8 +1536,9 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -1441,6 +1599,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_pipelined_pointers(ctx); i965_render_urb_layout(ctx); i965_render_cs_urb_layout(ctx); + i965_render_constant_buffer(ctx); i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); @@ -1531,7 +1690,7 @@ i965_render_initialize(VADriverContextP ctx) static void i965_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -1541,7 +1700,7 @@ i965_render_put_surface( struct intel_batchbuffer *batch = i965->batch; i965_render_initialize(ctx); - i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect); + i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags); i965_surface_render_pipeline_setup(ctx); intel_batchbuffer_flush(batch); } @@ -1549,22 +1708,22 @@ i965_render_put_surface( static void i965_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); i965_render_initialize(ctx); - i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect); + i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect); i965_subpic_render_pipeline_setup(ctx); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } @@ -1693,20 +1852,21 @@ gen6_render_depth_stencil_state(VADriverContextP ctx) static void gen6_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, - const VARectangle *dst_rect + const VARectangle *dst_rect, + unsigned int flags ) { i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface); + i965_render_src_surfaces_state(ctx, obj_surface, flags); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen6_render_color_calc_state(ctx); gen6_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, surface); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } static void @@ -1944,7 +2104,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + (URB_CS_ENTRY_SIZE-1)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -1957,7 +2117,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | @@ -2055,7 +2215,7 @@ gen6_render_emit_states(VADriverContextP ctx, int kernel) static void gen6_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -2065,7 +2225,7 @@ gen6_render_put_surface( struct intel_batchbuffer *batch = i965->batch; gen6_render_initialize(ctx); - gen6_render_setup_states(ctx, surface, src_rect, dst_rect); + gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); i965_clear_dest_region(ctx); gen6_render_emit_states(ctx, PS_KERNEL); intel_batchbuffer_flush(batch); @@ -2096,39 +2256,40 @@ gen6_subpicture_render_blend_state(VADriverContextP ctx) static void gen6_subpicture_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen6_render_color_calc_state(ctx); gen6_subpicture_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } static void gen6_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); gen6_render_initialize(ctx); - gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect); + gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } @@ -2206,6 +2367,11 @@ gen7_render_initialize(VADriverContextP ctx) render_state->cc.depth_stencil = bo; } +/* + * for GEN8 + */ +#define ALIGNMENT 64 + static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2282,25 +2448,28 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } + static void gen7_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, - const VARectangle *dst_rect + const VARectangle *dst_rect, + unsigned int flags ) { i965_render_dest_surface_state(ctx, 0); - i965_render_src_surfaces_state(ctx, surface); + i965_render_src_surfaces_state(ctx, obj_surface, flags); gen7_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); gen7_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, surface); - i965_render_upload_vertex(ctx, surface, src_rect, dst_rect); + i965_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } + static void gen7_emit_invarient_states(VADriverContextP ctx) { @@ -2382,6 +2551,10 @@ gen7_emit_urb(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; + unsigned int num_urb_entries = 32; + + if (IS_HASWELL(i965->intel.device_info)) + num_urb_entries = 64; BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); @@ -2391,7 +2564,7 @@ gen7_emit_urb(VADriverContextP ctx) BEGIN_BATCH(batch, 2); OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); OUT_BATCH(batch, - (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */ + (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); ADVANCE_BATCH(batch); @@ -2686,6 +2859,13 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; struct i965_render_state *render_state = &i965->render_state; + unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; + unsigned int num_samples = 0; + + if (IS_HASWELL(i965->intel.device_info)) { + max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; + num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; + } BEGIN_BATCH(batch, 3); OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2)); @@ -2697,7 +2877,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 7); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); OUT_RELOC(batch, render_state->curbe.bo, @@ -2719,7 +2899,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); /* scratch space base offset */ OUT_BATCH(batch, - ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples | GEN7_PS_PUSH_CONSTANT_ENABLE | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); @@ -2817,10 +2997,11 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } + static void gen7_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags @@ -2830,12 +3011,13 @@ gen7_render_put_surface( struct intel_batchbuffer *batch = i965->batch; gen7_render_initialize(ctx); - gen7_render_setup_states(ctx, surface, src_rect, dst_rect); + gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); i965_clear_dest_region(ctx); gen7_render_emit_states(ctx, PS_KERNEL); intel_batchbuffer_flush(batch); } + static void gen7_subpicture_render_blend_state(VADriverContextP ctx) { @@ -2861,144 +3043,97 @@ gen7_subpicture_render_blend_state(VADriverContextP ctx) static void gen7_subpicture_render_setup_states( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { i965_render_dest_surface_state(ctx, 0); - i965_subpic_render_src_surfaces_state(ctx, surface); + i965_subpic_render_src_surfaces_state(ctx, obj_surface); i965_render_sampler(ctx); i965_render_cc_viewport(ctx); gen7_render_color_calc_state(ctx); gen7_subpicture_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_subpic_render_upload_vertex(ctx, surface, dst_rect); + i965_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); } static void gen7_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - struct object_surface *obj_surface = SURFACE(surface); - struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic); + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; assert(obj_subpic); gen7_render_initialize(ctx); - gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect); + gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL); - i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); intel_batchbuffer_flush(batch); } -/* - * global functions - */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); void intel_render_put_surface( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect, unsigned int flags ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; int has_done_scaling = 0; - VASurfaceID in_surface_id = surface; - VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling); + VASurfaceID out_surface_id = i965_post_processing(ctx, + obj_surface, + src_rect, + dst_rect, + flags, + &has_done_scaling); assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID)); - if (out_surface_id != VA_INVALID_ID) - in_surface_id = out_surface_id; + if (out_surface_id != VA_INVALID_ID) { + struct object_surface *new_obj_surface = SURFACE(out_surface_id); + + if (new_obj_surface && new_obj_surface->bo) + obj_surface = new_obj_surface; - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); - else - i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags); + if (has_done_scaling) + src_rect = dst_rect; + } - if (in_surface_id != surface) - i965_DestroySurfaces(ctx, &in_surface_id, 1); + render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); + + if (out_surface_id != VA_INVALID_ID) + i965_DestroySurfaces(ctx, &out_surface_id, 1); } void intel_render_put_subpicture( VADriverContextP ctx, - VASurfaceID surface, + struct object_surface *obj_surface, const VARectangle *src_rect, const VARectangle *dst_rect ) { struct i965_driver_data *i965 = i965_driver_data(ctx); - - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect); - else - i965_render_put_subpicture(ctx, surface, src_rect, dst_rect); -} - -Bool -i965_render_init(VADriverContextP ctx) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - int i; - - /* kernel */ - assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / - sizeof(render_kernels_gen5[0]))); - assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / - sizeof(render_kernels_gen6[0]))); - - if (IS_GEN7(i965->intel.device_id)) - memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels)); - else if (IS_GEN6(i965->intel.device_id)) - memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); - else if (IS_IRONLAKE(i965->intel.device_id)) - memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); - else - memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); - - for (i = 0; i < NUM_RENDER_KERNEL; i++) { - struct i965_kernel *kernel = &render_state->render_kernels[i]; - - if (!kernel->size) - continue; - - kernel->bo = dri_bo_alloc(i965->intel.bufmgr, - kernel->name, - kernel->size, 0x1000); - assert(kernel->bo); - dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin); - } - - /* constant buffer */ - render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr, - "constant buffer", - 4096, 64); - assert(render_state->curbe.bo); - return True; + render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); } -Bool -i965_render_terminate(VADriverContextP ctx) +static void +genx_render_terminate(VADriverContextP ctx) { int i; struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -3009,7 +3144,7 @@ i965_render_terminate(VADriverContextP ctx) for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; - + dri_bo_unreference(kernel->bo); kernel->bo = NULL; } @@ -3039,7 +3174,78 @@ i965_render_terminate(VADriverContextP ctx) free(render_state->draw_region); render_state->draw_region = NULL; } +} - return True; +bool +genx_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + int i; + + /* kernel */ + assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / + sizeof(render_kernels_gen5[0]))); + assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / + sizeof(render_kernels_gen6[0]))); + + if (IS_GEN7(i965->intel.device_info)) { + memcpy(render_state->render_kernels, + (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7), + sizeof(render_state->render_kernels)); + render_state->render_put_surface = gen7_render_put_surface; + render_state->render_put_subpicture = gen7_render_put_subpicture; + } else if (IS_GEN6(i965->intel.device_info)) { + memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); + render_state->render_put_surface = gen6_render_put_surface; + render_state->render_put_subpicture = gen6_render_put_subpicture; + } else if (IS_IRONLAKE(i965->intel.device_info)) { + memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } else { + memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } + + render_state->render_terminate = genx_render_terminate; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + struct i965_kernel *kernel = &render_state->render_kernels[i]; + + if (!kernel->size) + continue; + + kernel->bo = dri_bo_alloc(i965->intel.bufmgr, + kernel->name, + kernel->size, 0x1000); + assert(kernel->bo); + dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin); + } + + /* constant buffer */ + render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr, + "constant buffer", + 4096, 64); + assert(render_state->curbe.bo); + + return true; +} + +bool +i965_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + return i965->codec_info->render_init(ctx); } +void +i965_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + render_state->render_terminate(ctx); +}