vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368 }
 369
 370 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 371 {
 372     // Clear Zbin mode boost for default case
 373     cpi->zbin_mode_boost = 0;
 374
 375     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 376     // when these values are not all zero.
 377     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 378     {
 379         vp8cx_init_quantizer(cpi);
 380     }
 381
 382     // MB level quantizer setup
 383     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 384 }
 385
 386
 387 /* activity_avg must be positive, or flat regions could get a zero weight
 388  *  (infinite lambda), which confounds analysis.
 389  * This also avoids the need for divide by zero checks in
 390  *  vp8_activity_masking().
 391  */
 392 #define VP8_ACTIVITY_AVG_MIN (64)
 393
 394 /* This is used as a reference when computing the source variance for the
 395  *  purposes of activity masking.
 396  * Eventually this should be replaced by custom no-reference routines,
 397  *  which will be faster.
 398  */
 399 static const unsigned char VP8_VAR_OFFS[16]=
 400 {
 401     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 402 };
 403
 404 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 405 {
 406     unsigned int act;
 407     unsigned int sse;
 408     int sum;
 409     unsigned int a;
 410     unsigned int b;
 411     unsigned int d;
 412     /* TODO: This could also be done over smaller areas (8x8), but that would
 413      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 414      *  over an entire MB in most of the code.
 415      * Another option is to compute four 8x8 variances, and pick a single
 416      *  lambda using a non-linear combination (e.g., the smallest, or second
 417      *  smallest, etc.).
 418      */
 419     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 420      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 421     /* This requires a full 32 bits of precision. */
 422     act = (sse<<8) - sum*sum;
 423     /* Drop 4 to give us some headroom to work with. */
 424     act = (act + 8) >> 4;
 425     /* If the region is flat, lower the activity some more. */
 426     if (act < 8<<12)
 427         act = act < 5<<12 ? act : 5<<12;
 428     /* TODO: For non-flat regions, edge regions should receive less masking
 429      *  than textured regions, but identifying edge regions quickly and
 430      *  reliably enough is still a subject of experimentation.
 431      * This will be most noticable near edges with a complex shape (e.g.,
 432      *  text), but the 4x4 transform size should make this less of a problem
 433      *  than it would be for an 8x8 transform.
 434      */
 435     /* Apply the masking to the RD multiplier. */
 436     a = act + 4*cpi->activity_avg;
 437     b = 4*act + cpi->activity_avg;
 438     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 439     return act;
 440 }
 441
 442
 443
 444 static
 445 void encode_mb_row(VP8_COMP *cpi,
 446                    VP8_COMMON *cm,
 447                    int mb_row,
 448                    MACROBLOCK  *x,
 449                    MACROBLOCKD *xd,
 450                    TOKENEXTRA **tp,
 451                    int *segment_counts,
 452                    int *totalrate)
 453 {
 454     INT64 activity_sum = 0;
 455     int i;
 456     int recon_yoffset, recon_uvoffset;
 457     int mb_col;
 458     int ref_fb_idx = cm->lst_fb_idx;
 459     int dst_fb_idx = cm->new_fb_idx;
 460     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 461     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 462     int seg_map_index = (mb_row * cpi->common.mb_cols);
 463
 464
 465     // reset above block coeffs
 466     xd->above_context = cm->above_context;
 467
 468     xd->up_available = (mb_row != 0);
 469     recon_yoffset = (mb_row * recon_y_stride * 16);
 470     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 471
 472     cpi->tplist[mb_row].start = *tp;
 473     //printf("Main mb_row = %d\n", mb_row);
 474
 475     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 476     // units as they are always compared to values that are in 1/8th pel units
 477     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 478     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 479
 480     // Set up limit values for vertical motion vector components
 481     // to prevent them extending beyond the UMV borders
 482     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 483     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 484                         + (VP8BORDERINPIXELS - 16);
 485
 486     // for each macroblock col in image
 487     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 488     {
 489         // Distance of Mb to the left & right edges, specified in
 490         // 1/8th pel units as they are always compared to values
 491         // that are in 1/8th pel units
 492         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 493         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 494
 495         // Set up limit values for horizontal motion vector components
 496         // to prevent them extending beyond the UMV borders
 497         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 498         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 499                             + (VP8BORDERINPIXELS - 16);
 500
 501         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 502         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 503         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 504         xd->left_available = (mb_col != 0);
 505
 506         x->rddiv = cpi->RDDIV;
 507         x->rdmult = cpi->RDMULT;
 508
 509         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 510             activity_sum += vp8_activity_masking(cpi, x);
 511
 512         // Is segmentation enabled
 513         // MB level adjutment to quantizer
 514         if (xd->segmentation_enabled)
 515         {
 516             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 517             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 518                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 519             else
 520                 xd->mode_info_context->mbmi.segment_id = 0;
 521
 522             vp8cx_mb_init_quantizer(cpi, x);
 523         }
 524         else
 525             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 526
 527         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 528
 529         if (cm->frame_type == KEY_FRAME)
 530         {
 531             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 532 #ifdef MODE_STATS
 533             y_modes[xd->mbmi.mode] ++;
 534 #endif
 535         }
 536         else
 537         {
 538             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 539
 540 #ifdef MODE_STATS
 541             inter_y_modes[xd->mbmi.mode] ++;
 542
 543             if (xd->mbmi.mode == SPLITMV)
 544             {
 545                 int b;
 546
 547                 for (b = 0; b < xd->mbmi.partition_count; b++)
 548                 {
 549                     inter_b_modes[x->partition->bmi[b].mode] ++;
 550                 }
 551             }
 552
 553 #endif
 554
 555             // Count of last ref frame 0,0 useage
 556             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 557                 cpi->inter_zz_count ++;
 558
 559             // Special case code for cyclic refresh
 560             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 561             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 562             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 563             {
 564                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 565
 566                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 567                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 568                 // else mark it as dirty (1).
 569                 if (xd->mode_info_context->mbmi.segment_id)
 570                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 571                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 572                 {
 573                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 574                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 575                 }
 576                 else
 577                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 578
 579             }
 580         }
 581
 582         cpi->tplist[mb_row].stop = *tp;
 583
 584         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 585
 586         for (i = 0; i < 16; i++)
 587             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 588
 589         // adjust to the next column of macroblocks
 590         x->src.y_buffer += 16;
 591         x->src.u_buffer += 8;
 592         x->src.v_buffer += 8;
 593
 594         recon_yoffset += 16;
 595         recon_uvoffset += 8;
 596
 597         // Keep track of segment useage
 598         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 599
 600         // skip to next mb
 601         xd->mode_info_context++;
 602         x->partition_info++;
 603
 604         xd->above_context++;
 605         cpi->current_mb_col_main = mb_col;
 606     }
 607
 608     //extend the recon for intra prediction
 609     vp8_extend_mb_row(
 610         &cm->yv12_fb[dst_fb_idx],
 611         xd->dst.y_buffer + 16,
 612         xd->dst.u_buffer + 8,
 613         xd->dst.v_buffer + 8);
 614
 615     // this is to account for the border
 616     xd->mode_info_context++;
 617     x->partition_info++;
 618     x->activity_sum += activity_sum;
 619 }
 620
 621
 622
 623
 624
 625 void vp8_encode_frame(VP8_COMP *cpi)
 626 {
 627     int mb_row;
 628     MACROBLOCK *const x = & cpi->mb;
 629     VP8_COMMON *const cm = & cpi->common;
 630     MACROBLOCKD *const xd = & x->e_mbd;
 631
 632     int i;
 633     TOKENEXTRA *tp = cpi->tok;
 634     int segment_counts[MAX_MB_SEGMENTS];
 635     int totalrate;
 636
 637     // Functions setup for all frame types so we can use MC in AltRef
 638     if (cm->mcomp_filter_type == SIXTAP)
 639     {
 640         xd->subpixel_predict        = SUBPIX_INVOKE(
 641                                         &cpi->common.rtcd.subpix, sixtap4x4);
 642         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 643                                         &cpi->common.rtcd.subpix, sixtap8x4);
 644         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 645                                         &cpi->common.rtcd.subpix, sixtap8x8);
 646         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 647                                         &cpi->common.rtcd.subpix, sixtap16x16);
 648     }
 649     else
 650     {
 651         xd->subpixel_predict        = SUBPIX_INVOKE(
 652                                         &cpi->common.rtcd.subpix, bilinear4x4);
 653         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 654                                         &cpi->common.rtcd.subpix, bilinear8x4);
 655         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 656                                         &cpi->common.rtcd.subpix, bilinear8x8);
 657         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 658                                       &cpi->common.rtcd.subpix, bilinear16x16);
 659     }
 660
 661     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 662
 663     x->vector_range = 32;
 664
 665     // Count of MBs using the alternate Q if any
 666     cpi->alt_qcount = 0;
 667
 668     // Reset frame count of inter 0,0 motion vector useage.
 669     cpi->inter_zz_count = 0;
 670
 671     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 672
 673     cpi->prediction_error = 0;
 674     cpi->intra_error = 0;
 675     cpi->skip_true_count = 0;
 676     cpi->skip_false_count = 0;
 677
 678 #if 0
 679     // Experimental code
 680     cpi->frame_distortion = 0;
 681     cpi->last_mb_distortion = 0;
 682 #endif
 683
 684     totalrate = 0;
 685
 686     x->partition_info = x->pi;
 687
 688     xd->mode_info_context = cm->mi;
 689     xd->mode_info_stride = cm->mode_info_stride;
 690
 691     xd->frame_type = cm->frame_type;
 692
 693     xd->frames_since_golden = cm->frames_since_golden;
 694     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 695     vp8_zero(cpi->MVcount);
 696     // vp8_zero( Contexts)
 697     vp8_zero(cpi->coef_counts);
 698
 699     // reset intra mode contexts
 700     if (cm->frame_type == KEY_FRAME)
 701         vp8_init_mbmode_probs(cm);
 702
 703
 704     vp8cx_frame_init_quantizer(cpi);
 705
 706     if (cpi->compressor_speed == 2)
 707     {
 708         if (cpi->oxcf.cpu_used < 0)
 709             cpi->Speed = -(cpi->oxcf.cpu_used);
 710         else
 711             vp8_auto_select_speed(cpi);
 712     }
 713
 714     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 715     //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
 716     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 717     //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
 718
 719     // Copy data over into macro block data sturctures.
 720
 721     x->src = * cpi->Source;
 722     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 723     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 724
 725     // set up frame new frame for intra coded blocks
 726
 727     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 728
 729     vp8_build_block_offsets(x);
 730
 731     vp8_setup_block_dptrs(&x->e_mbd);
 732
 733     vp8_setup_block_ptrs(x);
 734
 735     x->activity_sum = 0;
 736
 737 #if 0
 738     // Experimental rd code
 739     // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics
 740     // such as cpi->rate_correction_factor that indicate relative complexity.
 741     /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) )
 742     {
 743         //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb;
 744         x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor);
 745     }
 746     else
 747         x->rdmult = cpi->RDMULT; */
 748     //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
 749 #endif
 750
 751     xd->mode_info_context->mbmi.mode = DC_PRED;
 752     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 753
 754     xd->left_context = &cm->left_context;
 755
 756     vp8_zero(cpi->count_mb_ref_frame_usage)
 757     vp8_zero(cpi->ymode_count)
 758     vp8_zero(cpi->uv_mode_count)
 759
 760     x->mvc = cm->fc.mvc;
 761
 762     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 763
 764     {
 765         struct vpx_usec_timer  emr_timer;
 766         vpx_usec_timer_start(&emr_timer);
 767
 768         if (!cpi->b_multi_threaded)
 769         {
 770             // for each macroblock row in image
 771             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 772             {
 773
 774                 vp8_zero(cm->left_context)
 775
 776                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 777
 778                 // adjust to the next row of mbs
 779                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 780                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 781                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 782             }
 783
 784             cpi->tok_count = tp - cpi->tok;
 785
 786         }
 787         else
 788         {
 789 #if CONFIG_MULTITHREAD
 790             int i;
 791
 792             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 793
 794             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 795             {
 796                 cpi->current_mb_col_main = -1;
 797
 798                 for (i = 0; i < cpi->encoding_thread_count; i++)
 799                 {
 800                     if ((mb_row + i + 1) >= cm->mb_rows)
 801                         break;
 802
 803                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 804                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 805                     cpi->mb_row_ei[i].current_mb_col = -1;
 806                     //SetEvent(cpi->h_event_mbrencoding[i]);
 807                     sem_post(&cpi->h_event_mbrencoding[i]);
 808                 }
 809
 810                 vp8_zero(cm->left_context)
 811
 812                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 813
 814                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 815
 816                 // adjust to the next row of mbs
 817                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 818                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 819                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 820
 821                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 822                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 823
 824                 if (mb_row < cm->mb_rows - 1)
 825                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 826                     sem_wait(&cpi->h_event_main);
 827             }
 828
 829             /*
 830             for( ;mb_row<cm->mb_rows; mb_row ++)
 831             {
 832             vp8_zero( cm->left_context)
 833
 834             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 835
 836             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 837             // adjust to the next row of mbs
 838             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 839             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 840             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 841
 842             }
 843             */
 844             cpi->tok_count = 0;
 845
 846             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 847             {
 848                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 849             }
 850
 851             if (xd->segmentation_enabled)
 852             {
 853
 854                 int i, j;
 855
 856                 if (xd->segmentation_enabled)
 857                 {
 858
 859                     for (i = 0; i < cpi->encoding_thread_count; i++)
 860                     {
 861                         for (j = 0; j < 4; j++)
 862                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 863                     }
 864                 }
 865
 866             }
 867
 868             for (i = 0; i < cpi->encoding_thread_count; i++)
 869             {
 870                 totalrate += cpi->mb_row_ei[i].totalrate;
 871             }
 872
 873             for (i = 0; i < cpi->encoding_thread_count; i++)
 874             {
 875                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 876             }
 877
 878 #endif
 879
 880         }
 881
 882         vpx_usec_timer_mark(&emr_timer);
 883         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 884
 885     }
 886
 887
 888     // Work out the segment probabilites if segmentation is enabled
 889     if (xd->segmentation_enabled)
 890     {
 891         int tot_count;
 892         int i;
 893
 894         // Set to defaults
 895         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 896
 897         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 898
 899         if (tot_count)
 900         {
 901             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 902
 903             tot_count = segment_counts[0] + segment_counts[1];
 904
 905             if (tot_count > 0)
 906             {
 907                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 908             }
 909
 910             tot_count = segment_counts[2] + segment_counts[3];
 911
 912             if (tot_count > 0)
 913                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 914
 915             // Zero probabilities not allowed
 916             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 917             {
 918                 if (xd->mb_segment_tree_probs[i] == 0)
 919                     xd->mb_segment_tree_probs[i] = 1;
 920             }
 921         }
 922     }
 923
 924     // 256 rate units to the bit
 925     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 926
 927     // Make a note of the percentage MBs coded Intra.
 928     if (cm->frame_type == KEY_FRAME)
 929     {
 930         cpi->this_frame_percent_intra = 100;
 931     }
 932     else
 933     {
 934         int tot_modes;
 935
 936         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 937                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 938                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 939                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 940
 941         if (tot_modes)
 942             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 943
 944     }
 945
 946 #if 0
 947     {
 948         int cnt = 0;
 949         int flag[2] = {0, 0};
 950
 951         for (cnt = 0; cnt < MVPcount; cnt++)
 952         {
 953             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 954             {
 955                 flag[0] = 1;
 956                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 957                 break;
 958             }
 959         }
 960
 961         for (cnt = 0; cnt < MVPcount; cnt++)
 962         {
 963             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 964             {
 965                 flag[1] = 1;
 966                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 967                 break;
 968             }
 969         }
 970
 971         if (flag[0] || flag[1])
 972             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 973     }
 974 #endif
 975
 976     // Adjust the projected reference frame useage probability numbers to reflect
 977     // what we have just seen. This may be usefull when we make multiple itterations
 978     // of the recode loop rather than continuing to use values from the previous frame.
 979     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 980     {
 981         const int *const rfct = cpi->count_mb_ref_frame_usage;
 982         const int rf_intra = rfct[INTRA_FRAME];
 983         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 984
 985         if ((rf_intra + rf_inter) > 0)
 986         {
 987             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 988
 989             if (cpi->prob_intra_coded < 1)
 990                 cpi->prob_intra_coded = 1;
 991
 992             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
 993             {
 994                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
 995
 996                 if (cpi->prob_last_coded < 1)
 997                     cpi->prob_last_coded = 1;
 998
 999                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1000                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1001
1002                 if (cpi->prob_gf_coded < 1)
1003                     cpi->prob_gf_coded = 1;
1004             }
1005         }
1006     }
1007
1008 #if 0
1009     // Keep record of the total distortion this time around for future use
1010     cpi->last_frame_distortion = cpi->frame_distortion;
1011 #endif
1012
1013     /* Update the average activity for the next frame.
1014      * This is feed-forward for now; it could also be saved in two-pass, or
1015      *  done during lookahead when that is eventually added.
1016      */
1017     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1018     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1019         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1020
1021 }
1022 void vp8_setup_block_ptrs(MACROBLOCK *x)
1023 {
1024     int r, c;
1025     int i;
1026
1027     for (r = 0; r < 4; r++)
1028     {
1029         for (c = 0; c < 4; c++)
1030         {
1031             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1032         }
1033     }
1034
1035     for (r = 0; r < 2; r++)
1036     {
1037         for (c = 0; c < 2; c++)
1038         {
1039             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1040         }
1041     }
1042
1043
1044     for (r = 0; r < 2; r++)
1045     {
1046         for (c = 0; c < 2; c++)
1047         {
1048             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1049         }
1050     }
1051
1052     x->block[24].src_diff = x->src_diff + 384;
1053
1054
1055     for (i = 0; i < 25; i++)
1056     {
1057         x->block[i].coeff = x->coeff + i * 16;
1058     }
1059 }
1060
1061 void vp8_build_block_offsets(MACROBLOCK *x)
1062 {
1063     int block = 0;
1064     int br, bc;
1065
1066     vp8_build_block_doffsets(&x->e_mbd);
1067
1068     // y blocks
1069     for (br = 0; br < 4; br++)
1070     {
1071         for (bc = 0; bc < 4; bc++)
1072         {
1073             BLOCK *this_block = &x->block[block];
1074             this_block->base_src = &x->src.y_buffer;
1075             this_block->src_stride = x->src.y_stride;
1076             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1077             ++block;
1078         }
1079     }
1080
1081     // u blocks
1082     for (br = 0; br < 2; br++)
1083     {
1084         for (bc = 0; bc < 2; bc++)
1085         {
1086             BLOCK *this_block = &x->block[block];
1087             this_block->base_src = &x->src.u_buffer;
1088             this_block->src_stride = x->src.uv_stride;
1089             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1090             ++block;
1091         }
1092     }
1093
1094     // v blocks
1095     for (br = 0; br < 2; br++)
1096     {
1097         for (bc = 0; bc < 2; bc++)
1098         {
1099             BLOCK *this_block = &x->block[block];
1100             this_block->base_src = &x->src.v_buffer;
1101             this_block->src_stride = x->src.uv_stride;
1102             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1103             ++block;
1104         }
1105     }
1106 }
1107
1108 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1109 {
1110     const MACROBLOCKD *xd = & x->e_mbd;
1111     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1112     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1113
1114 #ifdef MODE_STATS
1115     const int is_key = cpi->common.frame_type == KEY_FRAME;
1116
1117     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1118
1119     if (m == B_PRED)
1120     {
1121         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1122
1123         int b = 0;
1124
1125         do
1126         {
1127             ++ bct[xd->block[b].bmi.mode];
1128         }
1129         while (++b < 16);
1130     }
1131
1132 #endif
1133
1134     ++cpi->ymode_count[m];
1135     ++cpi->uv_mode_count[uvm];
1136
1137 }
1138 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1139 {
1140     int Error4x4, Error16x16, error_uv;
1141     B_PREDICTION_MODE intra_bmodes[16];
1142     int rate4x4, rate16x16, rateuv;
1143     int dist4x4, dist16x16, distuv;
1144     int rate = 0;
1145     int rate4x4_tokenonly = 0;
1146     int rate16x16_tokenonly = 0;
1147     int rateuv_tokenonly = 0;
1148     int i;
1149
1150     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1151
1152 #if !(CONFIG_REALTIME_ONLY)
1153
1154     if (cpi->sf.RD || cpi->compressor_speed != 2)
1155     {
1156         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1157
1158         //save the b modes for possible later use
1159         for (i = 0; i < 16; i++)
1160             intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1161
1162         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1163
1164         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1165
1166         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1167         rate += rateuv;
1168
1169         if (Error4x4 < Error16x16)
1170         {
1171             rate += rate4x4;
1172             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1173
1174             // get back the intra block modes
1175             for (i = 0; i < 16; i++)
1176                 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1177
1178             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1179             cpi->prediction_error += Error4x4 ;
1180 #if 0
1181             // Experimental RD code
1182             cpi->frame_distortion += dist4x4;
1183 #endif
1184         }
1185         else
1186         {
1187             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1188             rate += rate16x16;
1189
1190 #if 0
1191             // Experimental RD code
1192             cpi->prediction_error += Error16x16;
1193             cpi->frame_distortion += dist16x16;
1194 #endif
1195         }
1196
1197         sum_intra_stats(cpi, x);
1198
1199         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1200     }
1201     else
1202 #endif
1203     {
1204
1205         int rate2, distortion2;
1206         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1207         int this_rd;
1208         Error16x16 = INT_MAX;
1209
1210         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1211         {
1212             x->e_mbd.mode_info_context->mbmi.mode = mode;
1213             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1214             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1215             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1216             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1217
1218             if (Error16x16 > this_rd)
1219             {
1220                 Error16x16 = this_rd;
1221                 best_mode = mode;
1222             }
1223         }
1224
1225         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1226
1227         if (distortion2 == INT_MAX)
1228             Error4x4 = INT_MAX;
1229         else
1230             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1231
1232         if (Error4x4 < Error16x16)
1233         {
1234             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1235             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1236             cpi->prediction_error += Error4x4;
1237         }
1238         else
1239         {
1240             x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1241             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1242             cpi->prediction_error += Error16x16;
1243         }
1244
1245         vp8_pick_intra_mbuv_mode(x);
1246         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1247         sum_intra_stats(cpi, x);
1248         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1249     }
1250
1251     return rate;
1252 }
1253 #ifdef SPEEDSTATS
1254 extern int cnt_pm;
1255 #endif
1256
1257 extern void vp8_fix_contexts(MACROBLOCKD *x);
1258
1259 int vp8cx_encode_inter_macroblock
1260 (
1261     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1262     int recon_yoffset, int recon_uvoffset
1263 )
1264 {
1265     MACROBLOCKD *const xd = &x->e_mbd;
1266     int inter_error;
1267     int intra_error = 0;
1268     int rate;
1269     int distortion;
1270
1271     x->skip = 0;
1272
1273     if (xd->segmentation_enabled)
1274         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1275     else
1276         x->encode_breakout = cpi->oxcf.encode_breakout;
1277
1278 #if !(CONFIG_REALTIME_ONLY)
1279
1280     if (cpi->sf.RD)
1281     {
1282         /* Are we using the fast quantizer for the mode selection? */
1283         if(cpi->sf.use_fastquant_for_pick)
1284             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1285
1286         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1287
1288         /* switch back to the regular quantizer for the encode */
1289         if (cpi->sf.improved_quant)
1290         {
1291             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1292         }
1293
1294     }
1295     else
1296 #endif
1297         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1298
1299
1300     cpi->prediction_error += inter_error;
1301     cpi->intra_error += intra_error;
1302
1303 #if 0
1304     // Experimental RD code
1305     cpi->frame_distortion += distortion;
1306     cpi->last_mb_distortion = distortion;
1307 #endif
1308
1309     // MB level adjutment to quantizer setup
1310     if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
1311     {
1312         // If cyclic update enabled
1313         if (cpi->cyclic_refresh_mode_enabled)
1314         {
1315             // Clear segment_id back to 0 if not coded (last frame 0,0)
1316             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1317                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1318             {
1319                 xd->mode_info_context->mbmi.segment_id = 0;
1320             }
1321         }
1322
1323         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1324         if (cpi->zbin_mode_boost_enabled)
1325         {
1326             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1327                  cpi->zbin_mode_boost = 0;
1328             else
1329             {
1330                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1331                 {
1332                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1333                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1334                     else
1335                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1336                 }
1337                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1338                     cpi->zbin_mode_boost = 0;
1339                 else
1340                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1341             }
1342         }
1343         else
1344             cpi->zbin_mode_boost = 0;
1345
1346         vp8cx_mb_init_quantizer(cpi,  x);
1347     }
1348
1349     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1350
1351     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1352     {
1353         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1354
1355         if (xd->mode_info_context->mbmi.mode == B_PRED)
1356         {
1357             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1358         }
1359         else
1360         {
1361             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1362         }
1363
1364         sum_intra_stats(cpi, x);
1365     }
1366     else
1367     {
1368         MV best_ref_mv;
1369         MV nearest, nearby;
1370         int mdcounts[4];
1371         int ref_fb_idx;
1372
1373         vp8_find_near_mvs(xd, xd->mode_info_context,
1374                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1375
1376         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1377
1378         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1379             ref_fb_idx = cpi->common.lst_fb_idx;
1380         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1381             ref_fb_idx = cpi->common.gld_fb_idx;
1382         else
1383             ref_fb_idx = cpi->common.alt_fb_idx;
1384
1385         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1386         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1387         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1388
1389         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1390         {
1391             int i;
1392
1393             for (i = 0; i < 16; i++)
1394             {
1395                 if (xd->block[i].bmi.mode == NEW4X4)
1396                 {
1397                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1398                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1399                 }
1400             }
1401         }
1402         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1403         {
1404             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1405             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1406         }
1407
1408         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1409         {
1410             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1411
1412             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1413             if (!cpi->common.mb_no_coeff_skip)
1414                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1415
1416         }
1417         else
1418             vp8_stuff_inter16x16(x);
1419     }
1420
1421     if (!x->skip)
1422         vp8_tokenize_mb(cpi, xd, t);
1423     else
1424     {
1425         if (cpi->common.mb_no_coeff_skip)
1426         {
1427             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1428                 xd->mode_info_context->mbmi.dc_diff = 0;
1429             else
1430                 xd->mode_info_context->mbmi.dc_diff = 1;
1431
1432             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1433             cpi->skip_true_count ++;
1434             vp8_fix_contexts(xd);
1435         }
1436         else
1437         {
1438             vp8_stuff_mb(cpi, xd, t);
1439             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1440             cpi->skip_false_count ++;
1441         }
1442     }
1443
1444     return rate;
1445 }