From e9c2a677a04e911529966cccf71eb3a9ae59e6c3 Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Fri, 9 May 2014 18:52:00 +0200 Subject: [PATCH] decoder: h264: optimize support for grayscale surfaces. Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luminance component ; (ii) speed by avoiding initialization of the (now inexistent) chrominance planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. v2: fix check for extra H.264 chroma formats [Haihao] Signed-off-by: Gwenole Beauchesne --- src/gen6_mfd.c | 8 ++++++-- src/gen75_mfd.c | 6 +++++- src/gen7_mfd.c | 6 +++++- src/gen8_mfd.c | 6 +++++- src/i965_decoder_utils.c | 23 +++++++++++++++++++---- src/i965_device_info.c | 9 +++++++++ src/i965_drv_video.c | 13 +++++++++++++ src/i965_drv_video.h | 9 +++++++++ 8 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 2092f69..f925d98 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; struct object_surface *obj_surface = decode_state->render_object; - + unsigned int surface_format; + + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 19) | ((obj_surface->orig_width - 1) << 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 5b023cf..895b194 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 06eb743..f9114e7 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index e3e71fb..10495d8 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 18704fe..9a5092e 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -185,25 +185,40 @@ avc_ensure_surface_bo( ) { VAStatus va_status; - uint32_t hw_fourcc, fourcc, subsample; + uint32_t hw_fourcc, fourcc, subsample, chroma_format; /* Validate chroma format */ switch (pic_param->seq_fields.bits.chroma_format_idc) { case 0: // Grayscale fourcc = VA_FOURCC_Y800; subsample = SUBSAMPLE_YUV400; + chroma_format = VA_RT_FORMAT_YUV400; break; case 1: // YUV 4:2:0 fourcc = VA_FOURCC_NV12; subsample = SUBSAMPLE_YUV420; + chroma_format = VA_RT_FORMAT_YUV420; break; default: return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; } - /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */ - hw_fourcc = VA_FOURCC_NV12; - subsample = SUBSAMPLE_YUV420; + /* Determine the HW surface format, bound to VA config needs */ + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) + hw_fourcc = fourcc; + else { + hw_fourcc = 0; + switch (fourcc) { + case VA_FOURCC_Y800: // Implement with an NV12 surface + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { + hw_fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + } + break; + } + } + if (!hw_fourcc) + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; /* (Re-)allocate the underlying surface buffer store, if necessary */ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 1d5d6aa..4fad7a4 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -27,6 +27,10 @@ #include #include "i965_drv_video.h" +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ +#define EXTRA_H264_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400) + /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */ #define EXTRA_JPEG_DEC_CHROMA_FORMATS \ (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \ @@ -90,6 +94,8 @@ static const struct hw_codec_info snb_hw_codec_info = { .min_linear_wpitch = 16, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_h264_decoding = 1, .has_h264_encoding = 1, @@ -120,6 +126,7 @@ static const struct hw_codec_info ivb_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -156,6 +163,7 @@ static const struct hw_codec_info hsw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -196,6 +204,7 @@ static const struct hw_codec_info bdw_hw_codec_info = { .min_linear_wpitch = 64, .min_linear_hpitch = 16, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 4690b62..c7da398 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -446,6 +446,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, uint32_t chroma_formats = VA_RT_FORMAT_YUV420; switch (profile) { + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if (HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + case VAProfileJPEGBaseline: if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; @@ -1532,6 +1539,7 @@ i965_CreateContext(VADriverContextP ctx, struct i965_render_state *render_state = &i965->render_state; struct object_config *obj_config = CONFIG(config_id); struct object_context *obj_context = NULL; + VAConfigAttrib *attrib; VAStatus vaStatus = VA_STATUS_SUCCESS; int contextID; int i; @@ -1625,6 +1633,11 @@ i965_CreateContext(VADriverContextP ctx, } } + attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat); + if (!attrib) + return VA_STATUS_ERROR_INVALID_CONFIG; + obj_context->codec_state.base.chroma_formats = attrib->value; + /* Error recovery */ if (VA_STATUS_SUCCESS != vaStatus) { i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context); diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index a09e071..e8bbf87 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -101,8 +101,13 @@ struct object_config #define NUM_SLICES 10 +struct codec_state_base { + uint32_t chroma_formats; +}; + struct decode_state { + struct codec_state_base base; struct buffer_store *pic_param; struct buffer_store **slice_params; struct buffer_store *iq_matrix; @@ -122,6 +127,7 @@ struct decode_state struct encode_state { + struct codec_state_base base; struct buffer_store *seq_param; struct buffer_store *pic_param; struct buffer_store *pic_control; @@ -152,6 +158,7 @@ struct encode_state struct proc_state { + struct codec_state_base base; struct buffer_store *pipeline_param; VASurfaceID current_render_target; @@ -163,6 +170,7 @@ struct proc_state union codec_state { + struct codec_state_base base; struct decode_state decode; struct encode_state encode; struct proc_state proc; @@ -294,6 +302,7 @@ struct hw_codec_info int min_linear_wpitch; int min_linear_hpitch; + unsigned int h264_dec_chroma_formats; unsigned int jpeg_dec_chroma_formats; unsigned int has_mpeg2_decoding:1; -- 2.7.4