Reduced the size of MB_MODE_INFO
authorScott LaVarnway <slavarnway@google.com>
Thu, 2 Sep 2010 20:17:52 +0000 (16:17 -0400)
committerJohn Koleszar <jkoleszar@google.com>
Fri, 3 Sep 2010 20:43:23 +0000 (16:43 -0400)
Moved partition_bmi and partition_count out of MB_MODE_INFO and
placed into MACROBLOCK.  Also reduced the size of other members
of the MB_MODE_INFO struct.  For 1080p, the memory was reduced
by 1,209,516 bytes.  The decoder performance appeared to improve
by 3% for the clip used.
Note:  The main goal for this change is to improve the decoder
performance.  The encoder will be revisited at a later date for
further structure cleanup.

Change-Id: I4733621292ee9cc3fffa4046cb3fd4d99bd14613

vp8/common/blockd.h
vp8/decoder/decodemv.c
vp8/decoder/threading.c
vp8/encoder/bitstream.c
vp8/encoder/block.h
vp8/encoder/encodeframe.c
vp8/encoder/ethreading.c
vp8/encoder/firstpass.c
vp8/encoder/onyx_if.c
vp8/encoder/pickinter.c
vp8/encoder/rdopt.c

index b286e2e..1cc5a1f 100644 (file)
@@ -168,14 +168,15 @@ typedef struct
         int as_int;
         MV  as_mv;
     } mv;
-    int partitioning;
-    int partition_count;
-    int mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
-    int dc_diff;
-    unsigned char   segment_id;                  // Which set of segmentation parameters should be used for this MB
-    int force_no_skip;
-    int need_to_clamp_mvs;
-    B_MODE_INFO partition_bmi[16];
+
+    char partitioning;
+    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char dc_diff;
+    unsigned char need_to_clamp_mvs;
+
+    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+
+    unsigned char force_no_skip; //encoder only
 } MB_MODE_INFO;
 
 
@@ -227,8 +228,6 @@ typedef struct
     YV12_BUFFER_CONFIG dst;
 
     MODE_INFO *mode_info_context;
-    MODE_INFO *mode_info;
-
     int mode_info_stride;
 
     FRAME_TYPE frame_type;
index 4f24b44..d141267 100644 (file)
@@ -219,8 +219,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
                     do  /* for each subset j */
                     {
-                        B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
-                        MV *const mv = & bmi->mv.as_mv;
+                        B_MODE_INFO bmi;
+                        MV *const mv = & bmi.mv.as_mv;
 
                         int k = -1;  /* first block in subset j */
                         int mv_contz;
@@ -237,7 +237,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
                         mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
 
-                        switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
+                        switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
                         {
                         case NEW4X4:
                             read_mv(bc, mv, (const MV_CONTEXT *) mvc);
@@ -285,7 +285,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                            refer back to us via "left" or "above". */
                         do
                             if (j == L[k])
-                                mi->bmi[k] = *bmi;
+                                mi->bmi[k] = bmi;
 
                         while (++k < 16);
                     }
index 02edba2..93acd36 100644 (file)
@@ -52,7 +52,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
 
-        mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
         mbd->mode_info_stride  = pc->mode_info_stride;
 
@@ -105,7 +104,6 @@ void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
         //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
 
-        mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
         mbd->mode_info_stride  = pc->mode_info_stride;
 
index 2162984..c706395 100644 (file)
@@ -872,6 +872,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
     int prob_skip_false = 0;
     ms = pc->mi - 1;
 
+    cpi->mb.partition_info = cpi->mb.pi;
+
     // Calculate the probabilities to be used to code the reference frame based on actual useage this frame
     if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
         cpi->prob_intra_coded = 1;
@@ -1020,7 +1022,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 
                     do
                     {
-                        const B_MODE_INFO *const b = mi->partition_bmi + j;
+                        const B_MODE_INFO *const b = cpi->mb.partition_info->bmi + j;
                         const int *const  L = vp8_mbsplits [mi->partitioning];
                         int k = -1;  /* first block in subset j */
                         int mv_contz;
@@ -1042,7 +1044,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
                             write_mv(w, &b->mv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
                         }
                     }
-                    while (++j < mi->partition_count);
+                    while (++j < cpi->mb.partition_info->count);
                 }
                 break;
                 default:
@@ -1051,9 +1053,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
             }
 
             ++m;
+            cpi->mb.partition_info++;
         }
 
         ++m;  /* skip L prediction border */
+        cpi->mb.partition_info++;
     }
 }
 
index c914a32..be2b816 100644 (file)
@@ -52,6 +52,12 @@ typedef struct
 
 typedef struct
 {
+    int count;
+    B_MODE_INFO bmi[16];
+} PARTITION_INFO;
+
+typedef struct
+{
     DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
     DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
 
@@ -61,6 +67,9 @@ typedef struct
     YV12_BUFFER_CONFIG src;
 
     MACROBLOCKD e_mbd;
+    PARTITION_INFO *partition_info; /* work pointer */
+    PARTITION_INFO *pi;   /* Corresponds to upper left visible macroblock */
+    PARTITION_INFO *pip;  /* Base of allocated array */
 
     search_site *ss;
     int ss_count;
index 1e0e1ab..96f36ee 100644 (file)
@@ -458,7 +458,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
                 for (b = 0; b < xd->mbmi.partition_count; b++)
                 {
-                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                    inter_b_modes[x->partition->bmi[b].mode] ++;
                 }
             }
 
@@ -511,6 +511,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
         // skip to next mb
         xd->mode_info_context++;
+        x->partition_info++;
 
         xd->above_context++;
         cpi->current_mb_col_main = mb_col;
@@ -525,6 +526,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
     // this is to account for the border
     xd->mode_info_context++;
+    x->partition_info++;
 }
 
 
@@ -594,7 +596,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
 
     totalrate = 0;
 
-    xd->mode_info = cm->mi - 1;
+    x->partition_info = x->pi;
 
     xd->mode_info_context = cm->mi;
     xd->mode_info_stride = cm->mode_info_stride;
@@ -730,6 +732,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 
                 if (mb_row < cm->mb_rows - 1)
                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
index b677fde..04093ff 100644 (file)
@@ -147,7 +147,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                                 for (b = 0; b < xd->mbmi.partition_count; b++)
                                 {
-                                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                                    inter_b_modes[x->partition->bmi[b].mode] ++;
                                 }
                             }
 
@@ -179,6 +179,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                         // skip to next mb
                         xd->mode_info_context++;
+                        x->partition_info++;
 
                         xd->above_context++;
 
@@ -195,12 +196,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                     // this is to account for the border
                     xd->mode_info_context++;
+                    x->partition_info++;
 
                     x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
                     x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
                     x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                     xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                    x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
 
                     if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1)
                     {
@@ -364,7 +367,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
         vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
         mbr_ei[i].totalrate = 0;
 
-        mbd->mode_info        = cm->mi - 1;
+        mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
+
         mbd->mode_info_context = cm->mi   + x->e_mbd.mode_info_stride * (i + 1);
         mbd->mode_info_stride  = cm->mode_info_stride;
 
index a58b9b7..fea4e3d 100644 (file)
@@ -565,6 +565,8 @@ void vp8_first_pass(VP8_COMP *cpi)
     xd->pre = *lst_yv12;
     xd->dst = *new_yv12;
 
+    x->partition_info = x->pi;
+
     xd->mode_info_context = cm->mi;
 
     vp8_build_block_offsets(x);
index fe83ae9..201de33 100644 (file)
@@ -29,6 +29,8 @@
 #include "swapyv12buffer.h"
 #include "threading.h"
 #include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
 #include <math.h>
 #include <stdio.h>
 #include <limits.h>
@@ -230,6 +232,11 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 
     cpi->gf_active_flags = 0;
 
+    if(cpi->mb.pip)
+        vpx_free(cpi->mb.pip);
+
+    cpi->mb.pip = 0;
+
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -1221,6 +1228,20 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi)
 
     cpi->source_buffer_count = 0;
 }
+
+static int vp8_alloc_partition_data(VP8_COMP *cpi)
+{
+    cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
+                                (cpi->common.mb_rows + 1),
+                                sizeof(PARTITION_INFO));
+    if(!cpi->mb.pip)
+        return ALLOC_FAILURE;
+
+    cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1;
+
+    return 0;
+}
+
 void vp8_alloc_compressor_data(VP8_COMP *cpi)
 {
     VP8_COMMON *cm = & cpi->common;
@@ -1232,6 +1253,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
         vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate frame buffers");
 
+    if (vp8_alloc_partition_data(cpi))
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate partition data");
+
+
     if ((width & 0xf) != 0)
         width += 16 - (width & 0xf);
 
index 9da715e..2be412c 100644 (file)
@@ -430,6 +430,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv1;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -832,6 +833,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
             *returndistortion = distortion2;
             best_rd = this_rd;
             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             if (this_mode == B_PRED || this_mode == SPLITMV)
                 for (i = 0; i < 16; i++)
@@ -906,6 +908,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         best_mbmode.dc_diff = 0;
 
         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
@@ -920,6 +923,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
     if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
         for (i = 0; i < 16; i++)
index c5dd59a..75a71d7 100644 (file)
@@ -1439,9 +1439,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
     // save partitions
     labels = vp8_mbsplits[best_seg];
     x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
-    x->e_mbd.mode_info_context->mbmi.partition_count = vp8_count_labels(labels);
+    x->partition_info->count = vp8_count_labels(labels);
 
-    for (i = 0; i < x->e_mbd.mode_info_context->mbmi.partition_count; i++)
+    for (i = 0; i < x->partition_info->count; i++)
     {
         int j;
 
@@ -1451,8 +1451,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                 break;
         }
 
-        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
-        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
+        x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
+        x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
     }
 
     return best_segment_rd;
@@ -1466,6 +1466,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -1787,19 +1788,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             }
 
             // trap cases where the 8x8s can be promoted to 8x16s or 16x8s
-            if (0)//x->e_mbd.mbmi.partition_count == 4)
+            if (0)//x->partition_info->count == 4)
             {
 
-                if (x->e_mbd.mode_info_context->mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[1].mv.as_int
-                    && x->e_mbd.mode_info_context->mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[3].mv.as_int)
+                if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int
+                    && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int)
                 {
                     const int *labels = vp8_mbsplits[2];
                     x->e_mbd.mode_info_context->mbmi.partitioning = 0;
                     rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
                     rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[1]];
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[3]];
-                    x->e_mbd.mode_info_context->mbmi.partition_bmi[1] = x->e_mbd.mode_info_context->mbmi.partition_bmi[2];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[1]];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[3]];
+                    x->partition_info->bmi[1] = x->partition_info->bmi[2];
                 }
             }
 
@@ -2143,6 +2144,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             *returndistortion = distortion2;
             best_rd = this_rd;
             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             for (i = 0; i < 16; i++)
             {
@@ -2224,6 +2226,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         best_mbmode.dc_diff = 0;
 
         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
@@ -2238,6 +2241,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
     for (i = 0; i < 16; i++)
     {