From 1eeca88691ad0fd232f110f3a389ebb494c0a6dc Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Fri, 11 Mar 2011 13:43:10 -0800 Subject: [PATCH] VP8: optimize VP8Context struct ordering Shaves at least 3KB off code size on x86, should improve cache utilization. This would probably be useful to do for other decoders/encoders as well. --- libavcodec/vp8.c | 2 +- libavcodec/vp8.h | 156 +++++++++++++++++++++++++++---------------------------- 2 files changed, 79 insertions(+), 79 deletions(-) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index de077c8..42f401d 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -474,7 +474,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; int idx = CNT_ZERO; int cur_sign_bias = s->sign_bias[mb->ref_frame]; - int *sign_bias = s->sign_bias; + int8_t *sign_bias = s->sign_bias; VP56mv near_mv[4]; uint8_t cnt[4] = { 0 }; VP56RangeCoder *c = &s->c; diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index b94d453..2db056f 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -85,83 +85,24 @@ typedef struct { typedef struct { AVCodecContext *avctx; - DSPContext dsp; - VP8DSPContext vp8dsp; - H264PredContext hpc; - vp8_mc_func put_pixels_tab[3][3][3]; - AVFrame frames[4]; AVFrame *framep[4]; uint8_t *edge_emu_buffer; - VP56RangeCoder c; ///< header context, includes mb modes and motion vectors - int profile; - int mb_width; /* number of horizontal MB */ - int mb_height; /* number of vertical MB */ + uint16_t mb_width; /* number of horizontal MB */ + uint16_t mb_height; /* number of vertical MB */ int linesize; int uvlinesize; - int keyframe; - int invisible; - int update_last; ///< update VP56_FRAME_PREVIOUS with the current one - int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so - int update_altref; - int deblock_filter; - - /** - * If this flag is not set, all the probability updates - * are discarded after this frame is decoded. - */ - int update_probabilities; - - /** - * All coefficients are contained in separate arith coding contexts. - * There can be 1, 2, 4, or 8 of these after the header context. - */ - int num_coeff_partitions; - VP56RangeCoder coeff_partition[8]; - - VP8Macroblock *macroblocks; - VP8Macroblock *macroblocks_base; - VP8FilterStrength *filter_strength; - - uint8_t *intra4x4_pred_mode_top; - uint8_t intra4x4_pred_mode_left[4]; - uint8_t *segmentation_map; - - /** - * Cache of the top row needed for intra prediction - * 16 for luma, 8 for each chroma plane - */ - uint8_t (*top_border)[16+8+8]; - - /** - * For coeff decode, we need to know whether the above block had non-zero - * coefficients. This means for each macroblock, we need data for 4 luma - * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 - * per macroblock. We keep the last row in top_nnz. - */ - uint8_t (*top_nnz)[9]; - DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; - - /** - * This is the index plus one of the last non-zero coeff - * for each of the blocks in the current macroblock. - * So, 0 -> no coeffs - * 1 -> dc-only (special transform) - * 2+-> full transform - */ - DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; - DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; - DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; - uint8_t intra4x4_pred_mode_mb[16]; - - int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock - int segment; ///< segment of the current macroblock + uint8_t keyframe; + uint8_t deblock_filter; + uint8_t mbskip_enabled; + uint8_t segment; ///< segment of the current macroblock + uint8_t chroma_pred_mode; ///< 8x8c pred mode of the current macroblock + uint8_t profile; VP56mv mv_min; VP56mv mv_max; - int mbskip_enabled; - int sign_bias[4]; ///< one state [0, 1] per ref frame type + int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type int ref_count[3]; /** @@ -170,13 +111,26 @@ typedef struct { * a frame, since the values persist between interframes. */ struct { - int enabled; - int absolute_vals; - int update_map; + uint8_t enabled; + uint8_t absolute_vals; + uint8_t update_map; int8_t base_quant[4]; int8_t filter_level[4]; ///< base loop filter level } segmentation; + struct { + uint8_t simple; + uint8_t level; + uint8_t sharpness; + } filter; + + VP8Macroblock *macroblocks; + VP8FilterStrength *filter_strength; + + uint8_t *intra4x4_pred_mode_top; + uint8_t intra4x4_pred_mode_left[4]; + uint8_t *segmentation_map; + /** * Macroblocks can have one of 4 different quants in a frame when * segmentation is enabled. @@ -190,13 +144,7 @@ typedef struct { } qmat[4]; struct { - int simple; - int level; - int sharpness; - } filter; - - struct { - int enabled; ///< whether each mb can have a different strength based on mode/ref + uint8_t enabled; ///< whether each mb can have a different strength based on mode/ref /** * filter strength adjustment for the following macroblock modes: @@ -220,6 +168,34 @@ typedef struct { } lf_delta; /** + * Cache of the top row needed for intra prediction + * 16 for luma, 8 for each chroma plane + */ + uint8_t (*top_border)[16+8+8]; + + /** + * For coeff decode, we need to know whether the above block had non-zero + * coefficients. This means for each macroblock, we need data for 4 luma + * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 + * per macroblock. We keep the last row in top_nnz. + */ + uint8_t (*top_nnz)[9]; + DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; + + /** + * This is the index plus one of the last non-zero coeff + * for each of the blocks in the current macroblock. + * So, 0 -> no coeffs + * 1 -> dc-only (special transform) + * 2+-> full transform + */ + DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; + VP56RangeCoder c; ///< header context, includes mb modes and motion vectors + DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; + DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; + uint8_t intra4x4_pred_mode_mb[16]; + + /** * These are all of the updatable probabilities for binary decisions. * They are only implictly reset on keyframes, making it quite likely * for an interframe to desync if a prior frame's header was corrupt @@ -236,6 +212,30 @@ typedef struct { uint8_t token[4][16][3][NUM_DCT_TOKENS-1]; uint8_t mvc[2][19]; } prob[2]; + + VP8Macroblock *macroblocks_base; + int invisible; + int update_last; ///< update VP56_FRAME_PREVIOUS with the current one + int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so + int update_altref; + + /** + * If this flag is not set, all the probability updates + * are discarded after this frame is decoded. + */ + int update_probabilities; + + /** + * All coefficients are contained in separate arith coding contexts. + * There can be 1, 2, 4, or 8 of these after the header context. + */ + int num_coeff_partitions; + VP56RangeCoder coeff_partition[8]; + DSPContext dsp; + VP8DSPContext vp8dsp; + H264PredContext hpc; + vp8_mc_func put_pixels_tab[3][3][3]; + AVFrame frames[4]; } VP8Context; #endif -- 2.7.4