From: Zhao Halley Date: Thu, 2 Aug 2012 09:04:37 +0000 (+0300) Subject: work around hw limitation(dword alignment) of horizontal offset X-Git-Tag: staging-20130205~138 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=782452edee90b0cbf3c7fed4e2bbe6ff400fc3b2;p=platform%2Fupstream%2Flibva-intel-driver.git work around hw limitation(dword alignment) of horizontal offset on dst surface left edge (nv12 avs) --- diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index 587e4a1..68d4659 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -55,6 +55,10 @@ #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) +#define GPU_ASM_BLOCK_WIDTH 16 +#define GPU_ASM_BLOCK_HEIGHT 8 +#define GPU_ASM_X_OFFSET_ALIGNMENT 4 + static const uint32_t pp_null_gen5[][4] = { #include "shaders/post_processing/gen5_6/null.g4b.gen5" }; @@ -1688,23 +1692,36 @@ pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context return 0; } -static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, int width, int height) +static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect) { - #define BLOCK_WIDTH 16 - #define BLOCK_HEIGHT 8 - int i = 0; - if (width%BLOCK_WIDTH){ - pp_context->block_horizontal_mask = (1 << (width%BLOCK_WIDTH)) - 1; + int i; + /* x offset of dest surface must be dword aligned. + * so we have to extend dst surface on left edge, and mask out pixels not interested + */ + if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) { + pp_context->block_horizontal_mask_left = 0; + for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; iblock_horizontal_mask_left |= 1<block_horizontal_mask_left = 0xffff; + } + + int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; + if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){ + pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1; } else { - pp_context->block_horizontal_mask = 0xffff; + pp_context->block_horizontal_mask_right = 0xffff; } - if (height%BLOCK_HEIGHT){ - pp_context->block_vertical_mask = (1 << (height%BLOCK_HEIGHT)) - 1; + if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){ + pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1; } else { - pp_context->block_vertical_mask = 0xff; + pp_context->block_vertical_mask_bottom = 0xff; } } @@ -2252,19 +2269,21 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context pp_context->pp_y_steps = pp_avs_y_steps; pp_context->pp_set_block_parameter = pp_avs_set_block_parameter; - pp_avs_context->dest_x = dst_rect->x; + int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; + float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width; + pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend; pp_avs_context->dest_y = dst_rect->y; - pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); + pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16); pp_avs_context->dest_h = ALIGN(dst_rect->height, 8); - pp_avs_context->src_normalized_x = (float)src_rect->x / in_w; + pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w; pp_avs_context->src_normalized_y = (float)src_rect->y / in_h; - pp_avs_context->src_w = src_rect->width; + pp_avs_context->src_w = src_rect->width + src_left_edge_extend; pp_avs_context->src_h = src_rect->height; pp_static_parameter->grf4.r4_2.avs.nlas = nlas; pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height; - pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width; + pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend); pp_inline_parameter->grf5.block_count_x = 1; /* M x 1 */ pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8; pp_inline_parameter->grf6.video_step_delta = 0.0; @@ -3505,8 +3524,6 @@ ironlake_pp_initialize( else va_status = VA_STATUS_ERROR_UNIMPLEMENTED; - calculate_boundary_block_mask(pp_context, dst_rect->width, dst_rect->height); - return va_status; } @@ -3643,6 +3660,8 @@ gen6_pp_initialize( else va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + calculate_boundary_block_mask(pp_context, dst_rect); + return va_status; } @@ -3819,27 +3838,36 @@ static void update_block_mask_parameter(struct i965_post_processing_context *pp_ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; pp_inline_parameter->grf5.block_vertical_mask = 0xff; - pp_inline_parameter->grf5.block_horizontal_mask = 0xffff; - pp_inline_parameter->grf6.block_vertical_mask = pp_context->block_vertical_mask; - pp_inline_parameter->grf6.block_horizontal_mask = pp_context->block_horizontal_mask; + pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom; + // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle + pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left; + pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff; + pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right; /* 1 x N */ if (x_steps == 1) { if (y == y_steps-1) { - pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask; + pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom; } else { - pp_inline_parameter->grf6.block_vertical_mask = 0xff; + pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff; } } /* M x 1 */ if (y_steps == 1) { - if (x == x_steps-1) { - pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask; + if (x == 0) { // all blocks in this group are on the left edge + pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left; + pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; + } + else if (x == x_steps-1) { + pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right; + pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right; } else { - pp_inline_parameter->grf6.block_horizontal_mask = 0xffff; + pp_inline_parameter->grf5.block_horizontal_mask = 0xffff; + pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff; + pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff; } } diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 1afffc5..ecfbae1 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -313,13 +313,17 @@ struct pp_inline_parameter /* AVS r6.0 */ float video_step_delta; - /* r6.1 */ - unsigned int block_horizontal_mask:16; - unsigned int block_vertical_mask:8; - unsigned int pad:8; + /* r6.1 */ // sizeof(int) == 4? + unsigned int block_horizontal_mask_right:16; + unsigned int block_vertical_mask_bottom:8; + unsigned int pad1:8; - /* r6.2-r6.7 */ - unsigned int padx[6]; + /* r6.2 */ + unsigned int block_horizontal_mask_middle:16; + unsigned int pad2:16; + + /* r6.3-r6.7 */ + unsigned int padx[5]; } grf6; }; @@ -476,8 +480,9 @@ struct i965_post_processing_context struct intel_batchbuffer *batch; - unsigned int block_horizontal_mask:16; - unsigned int block_vertical_mask:8; + unsigned int block_horizontal_mask_left:16; + unsigned int block_horizontal_mask_right:16; + unsigned int block_vertical_mask_bottom:8; }; struct i965_proc_context