vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368
 369     /* save this macroblock QIndex for vp8_update_zbin_extra() */
 370     x->q_index = QIndex;
 371 }
 372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
 373 {
 374     int i;
 375     int QIndex = x->q_index;
 376     int zbin_extra;
 377
 378     // Y
 379     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 380     for (i = 0; i < 16; i++)
 381     {
 382         x->block[i].zbin_extra = (short)zbin_extra;
 383     }
 384
 385     // UV
 386     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 387     for (i = 16; i < 24; i++)
 388     {
 389         x->block[i].zbin_extra = (short)zbin_extra;
 390     }
 391
 392     // Y2
 393     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 394     x->block[24].zbin_extra = (short)zbin_extra;
 395 }
 396
 397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 398 {
 399     // Clear Zbin mode boost for default case
 400     cpi->zbin_mode_boost = 0;
 401
 402     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 403     // when these values are not all zero.
 404     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 405     {
 406         vp8cx_init_quantizer(cpi);
 407     }
 408
 409     // MB level quantizer setup
 410     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 411 }
 412
 413
 414 /* activity_avg must be positive, or flat regions could get a zero weight
 415  *  (infinite lambda), which confounds analysis.
 416  * This also avoids the need for divide by zero checks in
 417  *  vp8_activity_masking().
 418  */
 419 #define VP8_ACTIVITY_AVG_MIN (64)
 420
 421 /* This is used as a reference when computing the source variance for the
 422  *  purposes of activity masking.
 423  * Eventually this should be replaced by custom no-reference routines,
 424  *  which will be faster.
 425  */
 426 static const unsigned char VP8_VAR_OFFS[16]=
 427 {
 428     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 429 };
 430
 431 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 432 {
 433     unsigned int act;
 434     unsigned int sse;
 435     int sum;
 436     unsigned int a;
 437     unsigned int b;
 438     /* TODO: This could also be done over smaller areas (8x8), but that would
 439      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 440      *  over an entire MB in most of the code.
 441      * Another option is to compute four 8x8 variances, and pick a single
 442      *  lambda using a non-linear combination (e.g., the smallest, or second
 443      *  smallest, etc.).
 444      */
 445     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 446      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 447     /* This requires a full 32 bits of precision. */
 448     act = (sse<<8) - sum*sum;
 449     /* Drop 4 to give us some headroom to work with. */
 450     act = (act + 8) >> 4;
 451     /* If the region is flat, lower the activity some more. */
 452     if (act < 8<<12)
 453         act = act < 5<<12 ? act : 5<<12;
 454     /* TODO: For non-flat regions, edge regions should receive less masking
 455      *  than textured regions, but identifying edge regions quickly and
 456      *  reliably enough is still a subject of experimentation.
 457      * This will be most noticable near edges with a complex shape (e.g.,
 458      *  text), but the 4x4 transform size should make this less of a problem
 459      *  than it would be for an 8x8 transform.
 460      */
 461     /* Apply the masking to the RD multiplier. */
 462     a = act + 4*cpi->activity_avg;
 463     b = 4*act + cpi->activity_avg;
 464     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 465     return act;
 466 }
 467
 468
 469
 470 static
 471 void encode_mb_row(VP8_COMP *cpi,
 472                    VP8_COMMON *cm,
 473                    int mb_row,
 474                    MACROBLOCK  *x,
 475                    MACROBLOCKD *xd,
 476                    TOKENEXTRA **tp,
 477                    int *segment_counts,
 478                    int *totalrate)
 479 {
 480     INT64 activity_sum = 0;
 481     int i;
 482     int recon_yoffset, recon_uvoffset;
 483     int mb_col;
 484     int ref_fb_idx = cm->lst_fb_idx;
 485     int dst_fb_idx = cm->new_fb_idx;
 486     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 487     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 488     int seg_map_index = (mb_row * cpi->common.mb_cols);
 489
 490
 491     // reset above block coeffs
 492     xd->above_context = cm->above_context;
 493
 494     xd->up_available = (mb_row != 0);
 495     recon_yoffset = (mb_row * recon_y_stride * 16);
 496     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 497
 498     cpi->tplist[mb_row].start = *tp;
 499     //printf("Main mb_row = %d\n", mb_row);
 500
 501     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 502     // units as they are always compared to values that are in 1/8th pel units
 503     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 504     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 505
 506     // Set up limit values for vertical motion vector components
 507     // to prevent them extending beyond the UMV borders
 508     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 509     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 510                         + (VP8BORDERINPIXELS - 16);
 511
 512     // for each macroblock col in image
 513     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 514     {
 515         // Distance of Mb to the left & right edges, specified in
 516         // 1/8th pel units as they are always compared to values
 517         // that are in 1/8th pel units
 518         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 519         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 520
 521         // Set up limit values for horizontal motion vector components
 522         // to prevent them extending beyond the UMV borders
 523         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 524         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 525                             + (VP8BORDERINPIXELS - 16);
 526
 527         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 528         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 529         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 530         xd->left_available = (mb_col != 0);
 531
 532         x->rddiv = cpi->RDDIV;
 533         x->rdmult = cpi->RDMULT;
 534
 535         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 536             activity_sum += vp8_activity_masking(cpi, x);
 537
 538         // Is segmentation enabled
 539         // MB level adjutment to quantizer
 540         if (xd->segmentation_enabled)
 541         {
 542             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 543             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 544                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 545             else
 546                 xd->mode_info_context->mbmi.segment_id = 0;
 547
 548             vp8cx_mb_init_quantizer(cpi, x);
 549         }
 550         else
 551             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 552
 553         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 554
 555         if (cm->frame_type == KEY_FRAME)
 556         {
 557             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 558 #ifdef MODE_STATS
 559             y_modes[xd->mbmi.mode] ++;
 560 #endif
 561         }
 562         else
 563         {
 564             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 565
 566 #ifdef MODE_STATS
 567             inter_y_modes[xd->mbmi.mode] ++;
 568
 569             if (xd->mbmi.mode == SPLITMV)
 570             {
 571                 int b;
 572
 573                 for (b = 0; b < xd->mbmi.partition_count; b++)
 574                 {
 575                     inter_b_modes[x->partition->bmi[b].mode] ++;
 576                 }
 577             }
 578
 579 #endif
 580
 581             // Count of last ref frame 0,0 useage
 582             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 583                 cpi->inter_zz_count ++;
 584
 585             // Special case code for cyclic refresh
 586             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 587             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 588             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 589             {
 590                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 591
 592                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 593                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 594                 // else mark it as dirty (1).
 595                 if (xd->mode_info_context->mbmi.segment_id)
 596                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 597                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 598                 {
 599                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 600                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 601                 }
 602                 else
 603                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 604
 605             }
 606         }
 607
 608         cpi->tplist[mb_row].stop = *tp;
 609
 610         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 611
 612         for (i = 0; i < 16; i++)
 613             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 614
 615         // adjust to the next column of macroblocks
 616         x->src.y_buffer += 16;
 617         x->src.u_buffer += 8;
 618         x->src.v_buffer += 8;
 619
 620         recon_yoffset += 16;
 621         recon_uvoffset += 8;
 622
 623         // Keep track of segment useage
 624         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 625
 626         // skip to next mb
 627         xd->mode_info_context++;
 628         x->partition_info++;
 629
 630         xd->above_context++;
 631         cpi->current_mb_col_main = mb_col;
 632     }
 633
 634     //extend the recon for intra prediction
 635     vp8_extend_mb_row(
 636         &cm->yv12_fb[dst_fb_idx],
 637         xd->dst.y_buffer + 16,
 638         xd->dst.u_buffer + 8,
 639         xd->dst.v_buffer + 8);
 640
 641     // this is to account for the border
 642     xd->mode_info_context++;
 643     x->partition_info++;
 644     x->activity_sum += activity_sum;
 645 }
 646
 647
 648
 649
 650
 651 void vp8_encode_frame(VP8_COMP *cpi)
 652 {
 653     int mb_row;
 654     MACROBLOCK *const x = & cpi->mb;
 655     VP8_COMMON *const cm = & cpi->common;
 656     MACROBLOCKD *const xd = & x->e_mbd;
 657
 658     TOKENEXTRA *tp = cpi->tok;
 659     int segment_counts[MAX_MB_SEGMENTS];
 660     int totalrate;
 661
 662     // Functions setup for all frame types so we can use MC in AltRef
 663     if (cm->mcomp_filter_type == SIXTAP)
 664     {
 665         xd->subpixel_predict        = SUBPIX_INVOKE(
 666                                         &cpi->common.rtcd.subpix, sixtap4x4);
 667         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 668                                         &cpi->common.rtcd.subpix, sixtap8x4);
 669         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 670                                         &cpi->common.rtcd.subpix, sixtap8x8);
 671         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 672                                         &cpi->common.rtcd.subpix, sixtap16x16);
 673     }
 674     else
 675     {
 676         xd->subpixel_predict        = SUBPIX_INVOKE(
 677                                         &cpi->common.rtcd.subpix, bilinear4x4);
 678         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 679                                         &cpi->common.rtcd.subpix, bilinear8x4);
 680         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 681                                         &cpi->common.rtcd.subpix, bilinear8x8);
 682         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 683                                       &cpi->common.rtcd.subpix, bilinear16x16);
 684     }
 685
 686     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 687
 688     x->vector_range = 32;
 689
 690     // Count of MBs using the alternate Q if any
 691     cpi->alt_qcount = 0;
 692
 693     // Reset frame count of inter 0,0 motion vector useage.
 694     cpi->inter_zz_count = 0;
 695
 696     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 697
 698     cpi->prediction_error = 0;
 699     cpi->intra_error = 0;
 700     cpi->skip_true_count = 0;
 701     cpi->skip_false_count = 0;
 702
 703 #if 0
 704     // Experimental code
 705     cpi->frame_distortion = 0;
 706     cpi->last_mb_distortion = 0;
 707 #endif
 708
 709     totalrate = 0;
 710
 711     x->partition_info = x->pi;
 712
 713     xd->mode_info_context = cm->mi;
 714     xd->mode_info_stride = cm->mode_info_stride;
 715
 716     xd->frame_type = cm->frame_type;
 717
 718     xd->frames_since_golden = cm->frames_since_golden;
 719     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 720     vp8_zero(cpi->MVcount);
 721     // vp8_zero( Contexts)
 722     vp8_zero(cpi->coef_counts);
 723
 724     // reset intra mode contexts
 725     if (cm->frame_type == KEY_FRAME)
 726         vp8_init_mbmode_probs(cm);
 727
 728
 729     vp8cx_frame_init_quantizer(cpi);
 730
 731     if (cpi->compressor_speed == 2)
 732     {
 733         if (cpi->oxcf.cpu_used < 0)
 734             cpi->Speed = -(cpi->oxcf.cpu_used);
 735         else
 736             vp8_auto_select_speed(cpi);
 737     }
 738
 739     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 740     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 741
 742     // Copy data over into macro block data sturctures.
 743
 744     x->src = * cpi->Source;
 745     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 746     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 747
 748     // set up frame new frame for intra coded blocks
 749
 750     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 751
 752     vp8_build_block_offsets(x);
 753
 754     vp8_setup_block_dptrs(&x->e_mbd);
 755
 756     vp8_setup_block_ptrs(x);
 757
 758     x->activity_sum = 0;
 759
 760     xd->mode_info_context->mbmi.mode = DC_PRED;
 761     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 762
 763     xd->left_context = &cm->left_context;
 764
 765     vp8_zero(cpi->count_mb_ref_frame_usage)
 766     vp8_zero(cpi->ymode_count)
 767     vp8_zero(cpi->uv_mode_count)
 768
 769     x->mvc = cm->fc.mvc;
 770
 771     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 772
 773     {
 774         struct vpx_usec_timer  emr_timer;
 775         vpx_usec_timer_start(&emr_timer);
 776
 777         if (!cpi->b_multi_threaded)
 778         {
 779             // for each macroblock row in image
 780             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 781             {
 782
 783                 vp8_zero(cm->left_context)
 784
 785                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 786
 787                 // adjust to the next row of mbs
 788                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 789                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 790                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 791             }
 792
 793             cpi->tok_count = tp - cpi->tok;
 794
 795         }
 796         else
 797         {
 798 #if CONFIG_MULTITHREAD
 799             int i;
 800
 801             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 802
 803             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 804             {
 805                 cpi->current_mb_col_main = -1;
 806
 807                 for (i = 0; i < cpi->encoding_thread_count; i++)
 808                 {
 809                     if ((mb_row + i + 1) >= cm->mb_rows)
 810                         break;
 811
 812                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 813                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 814                     cpi->mb_row_ei[i].current_mb_col = -1;
 815                     //SetEvent(cpi->h_event_mbrencoding[i]);
 816                     sem_post(&cpi->h_event_mbrencoding[i]);
 817                 }
 818
 819                 vp8_zero(cm->left_context)
 820
 821                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 822
 823                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 824
 825                 // adjust to the next row of mbs
 826                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 827                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 828                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 829
 830                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 831                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 832
 833                 if (mb_row < cm->mb_rows - 1)
 834                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 835                     sem_wait(&cpi->h_event_main);
 836             }
 837
 838             /*
 839             for( ;mb_row<cm->mb_rows; mb_row ++)
 840             {
 841             vp8_zero( cm->left_context)
 842
 843             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 844
 845             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 846             // adjust to the next row of mbs
 847             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 848             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 849             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 850
 851             }
 852             */
 853             cpi->tok_count = 0;
 854
 855             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 856             {
 857                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 858             }
 859
 860             if (xd->segmentation_enabled)
 861             {
 862
 863                 int i, j;
 864
 865                 if (xd->segmentation_enabled)
 866                 {
 867
 868                     for (i = 0; i < cpi->encoding_thread_count; i++)
 869                     {
 870                         for (j = 0; j < 4; j++)
 871                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 872                     }
 873                 }
 874
 875             }
 876
 877             for (i = 0; i < cpi->encoding_thread_count; i++)
 878             {
 879                 totalrate += cpi->mb_row_ei[i].totalrate;
 880             }
 881
 882             for (i = 0; i < cpi->encoding_thread_count; i++)
 883             {
 884                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 885             }
 886
 887 #endif
 888
 889         }
 890
 891         vpx_usec_timer_mark(&emr_timer);
 892         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 893
 894     }
 895
 896
 897     // Work out the segment probabilites if segmentation is enabled
 898     if (xd->segmentation_enabled)
 899     {
 900         int tot_count;
 901         int i;
 902
 903         // Set to defaults
 904         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 905
 906         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 907
 908         if (tot_count)
 909         {
 910             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 911
 912             tot_count = segment_counts[0] + segment_counts[1];
 913
 914             if (tot_count > 0)
 915             {
 916                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 917             }
 918
 919             tot_count = segment_counts[2] + segment_counts[3];
 920
 921             if (tot_count > 0)
 922                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 923
 924             // Zero probabilities not allowed
 925             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 926             {
 927                 if (xd->mb_segment_tree_probs[i] == 0)
 928                     xd->mb_segment_tree_probs[i] = 1;
 929             }
 930         }
 931     }
 932
 933     // 256 rate units to the bit
 934     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 935
 936     // Make a note of the percentage MBs coded Intra.
 937     if (cm->frame_type == KEY_FRAME)
 938     {
 939         cpi->this_frame_percent_intra = 100;
 940     }
 941     else
 942     {
 943         int tot_modes;
 944
 945         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 946                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 947                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 948                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 949
 950         if (tot_modes)
 951             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 952
 953     }
 954
 955 #if 0
 956     {
 957         int cnt = 0;
 958         int flag[2] = {0, 0};
 959
 960         for (cnt = 0; cnt < MVPcount; cnt++)
 961         {
 962             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 963             {
 964                 flag[0] = 1;
 965                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 966                 break;
 967             }
 968         }
 969
 970         for (cnt = 0; cnt < MVPcount; cnt++)
 971         {
 972             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 973             {
 974                 flag[1] = 1;
 975                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 976                 break;
 977             }
 978         }
 979
 980         if (flag[0] || flag[1])
 981             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 982     }
 983 #endif
 984
 985     // Adjust the projected reference frame useage probability numbers to reflect
 986     // what we have just seen. This may be usefull when we make multiple itterations
 987     // of the recode loop rather than continuing to use values from the previous frame.
 988     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 989     {
 990         const int *const rfct = cpi->count_mb_ref_frame_usage;
 991         const int rf_intra = rfct[INTRA_FRAME];
 992         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 993
 994         if ((rf_intra + rf_inter) > 0)
 995         {
 996             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 997
 998             if (cpi->prob_intra_coded < 1)
 999                 cpi->prob_intra_coded = 1;
1000
1001             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
1002             {
1003                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
1004
1005                 if (cpi->prob_last_coded < 1)
1006                     cpi->prob_last_coded = 1;
1007
1008                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1009                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1010
1011                 if (cpi->prob_gf_coded < 1)
1012                     cpi->prob_gf_coded = 1;
1013             }
1014         }
1015     }
1016
1017 #if 0
1018     // Keep record of the total distortion this time around for future use
1019     cpi->last_frame_distortion = cpi->frame_distortion;
1020 #endif
1021
1022     /* Update the average activity for the next frame.
1023      * This is feed-forward for now; it could also be saved in two-pass, or
1024      *  done during lookahead when that is eventually added.
1025      */
1026     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1027     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1028         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1029
1030 }
1031 void vp8_setup_block_ptrs(MACROBLOCK *x)
1032 {
1033     int r, c;
1034     int i;
1035
1036     for (r = 0; r < 4; r++)
1037     {
1038         for (c = 0; c < 4; c++)
1039         {
1040             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1041         }
1042     }
1043
1044     for (r = 0; r < 2; r++)
1045     {
1046         for (c = 0; c < 2; c++)
1047         {
1048             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1049         }
1050     }
1051
1052
1053     for (r = 0; r < 2; r++)
1054     {
1055         for (c = 0; c < 2; c++)
1056         {
1057             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1058         }
1059     }
1060
1061     x->block[24].src_diff = x->src_diff + 384;
1062
1063
1064     for (i = 0; i < 25; i++)
1065     {
1066         x->block[i].coeff = x->coeff + i * 16;
1067     }
1068 }
1069
1070 void vp8_build_block_offsets(MACROBLOCK *x)
1071 {
1072     int block = 0;
1073     int br, bc;
1074
1075     vp8_build_block_doffsets(&x->e_mbd);
1076
1077     // y blocks
1078     for (br = 0; br < 4; br++)
1079     {
1080         for (bc = 0; bc < 4; bc++)
1081         {
1082             BLOCK *this_block = &x->block[block];
1083             this_block->base_src = &x->src.y_buffer;
1084             this_block->src_stride = x->src.y_stride;
1085             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1086             ++block;
1087         }
1088     }
1089
1090     // u blocks
1091     for (br = 0; br < 2; br++)
1092     {
1093         for (bc = 0; bc < 2; bc++)
1094         {
1095             BLOCK *this_block = &x->block[block];
1096             this_block->base_src = &x->src.u_buffer;
1097             this_block->src_stride = x->src.uv_stride;
1098             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1099             ++block;
1100         }
1101     }
1102
1103     // v blocks
1104     for (br = 0; br < 2; br++)
1105     {
1106         for (bc = 0; bc < 2; bc++)
1107         {
1108             BLOCK *this_block = &x->block[block];
1109             this_block->base_src = &x->src.v_buffer;
1110             this_block->src_stride = x->src.uv_stride;
1111             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1112             ++block;
1113         }
1114     }
1115 }
1116
1117 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1118 {
1119     const MACROBLOCKD *xd = & x->e_mbd;
1120     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1121     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1122
1123 #ifdef MODE_STATS
1124     const int is_key = cpi->common.frame_type == KEY_FRAME;
1125
1126     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1127
1128     if (m == B_PRED)
1129     {
1130         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1131
1132         int b = 0;
1133
1134         do
1135         {
1136             ++ bct[xd->block[b].bmi.mode];
1137         }
1138         while (++b < 16);
1139     }
1140
1141 #endif
1142
1143     ++cpi->ymode_count[m];
1144     ++cpi->uv_mode_count[uvm];
1145
1146 }
1147 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1148 {
1149     int Error4x4, Error16x16, error_uv;
1150     B_PREDICTION_MODE intra_bmodes[16];
1151     int rate4x4, rate16x16, rateuv;
1152     int dist4x4, dist16x16, distuv;
1153     int rate = 0;
1154     int rate4x4_tokenonly = 0;
1155     int rate16x16_tokenonly = 0;
1156     int rateuv_tokenonly = 0;
1157     int i;
1158
1159     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1160
1161 #if !(CONFIG_REALTIME_ONLY)
1162
1163     if (cpi->sf.RD || cpi->compressor_speed != 2)
1164     {
1165         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1166
1167         //save the b modes for possible later use
1168         for (i = 0; i < 16; i++)
1169             intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1170
1171         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1172
1173         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1174
1175         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1176         rate += rateuv;
1177
1178         if (Error4x4 < Error16x16)
1179         {
1180             rate += rate4x4;
1181             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1182
1183             // get back the intra block modes
1184             for (i = 0; i < 16; i++)
1185                 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1186
1187             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1188             cpi->prediction_error += Error4x4 ;
1189 #if 0
1190             // Experimental RD code
1191             cpi->frame_distortion += dist4x4;
1192 #endif
1193         }
1194         else
1195         {
1196             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1197             rate += rate16x16;
1198
1199 #if 0
1200             // Experimental RD code
1201             cpi->prediction_error += Error16x16;
1202             cpi->frame_distortion += dist16x16;
1203 #endif
1204         }
1205
1206         sum_intra_stats(cpi, x);
1207
1208         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1209     }
1210     else
1211 #endif
1212     {
1213
1214         int rate2, distortion2;
1215         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1216         int this_rd;
1217         Error16x16 = INT_MAX;
1218
1219         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1220         {
1221             x->e_mbd.mode_info_context->mbmi.mode = mode;
1222             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1223             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1224             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1225             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1226
1227             if (Error16x16 > this_rd)
1228             {
1229                 Error16x16 = this_rd;
1230                 best_mode = mode;
1231             }
1232         }
1233
1234         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1235
1236         if (distortion2 == INT_MAX)
1237             Error4x4 = INT_MAX;
1238         else
1239             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1240
1241         if (Error4x4 < Error16x16)
1242         {
1243             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1244             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1245             cpi->prediction_error += Error4x4;
1246         }
1247         else
1248         {
1249             x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1250             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1251             cpi->prediction_error += Error16x16;
1252         }
1253
1254         vp8_pick_intra_mbuv_mode(x);
1255         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1256         sum_intra_stats(cpi, x);
1257         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1258     }
1259
1260     return rate;
1261 }
1262 #ifdef SPEEDSTATS
1263 extern int cnt_pm;
1264 #endif
1265
1266 extern void vp8_fix_contexts(MACROBLOCKD *x);
1267
1268 int vp8cx_encode_inter_macroblock
1269 (
1270     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1271     int recon_yoffset, int recon_uvoffset
1272 )
1273 {
1274     MACROBLOCKD *const xd = &x->e_mbd;
1275     int inter_error;
1276     int intra_error = 0;
1277     int rate;
1278     int distortion;
1279
1280     x->skip = 0;
1281
1282     if (xd->segmentation_enabled)
1283         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1284     else
1285         x->encode_breakout = cpi->oxcf.encode_breakout;
1286
1287 #if !(CONFIG_REALTIME_ONLY)
1288
1289     if (cpi->sf.RD)
1290     {
1291         int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
1292
1293         /* Are we using the fast quantizer for the mode selection? */
1294         if(cpi->sf.use_fastquant_for_pick)
1295         {
1296             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1297
1298             /* the fast quantizer does not use zbin_extra, so
1299              * do not recalculate */
1300             cpi->zbin_mode_boost_enabled = 0;
1301         }
1302         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1303
1304         /* switch back to the regular quantizer for the encode */
1305         if (cpi->sf.improved_quant)
1306         {
1307             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1308         }
1309
1310         /* restore cpi->zbin_mode_boost_enabled */
1311         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1312
1313     }
1314     else
1315 #endif
1316         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1317
1318
1319     cpi->prediction_error += inter_error;
1320     cpi->intra_error += intra_error;
1321
1322 #if 0
1323     // Experimental RD code
1324     cpi->frame_distortion += distortion;
1325     cpi->last_mb_distortion = distortion;
1326 #endif
1327
1328     // MB level adjutment to quantizer setup
1329     if (xd->segmentation_enabled)
1330     {
1331         // If cyclic update enabled
1332         if (cpi->cyclic_refresh_mode_enabled)
1333         {
1334             // Clear segment_id back to 0 if not coded (last frame 0,0)
1335             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1336                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1337             {
1338                 xd->mode_info_context->mbmi.segment_id = 0;
1339
1340                 /* segment_id changed, so update */
1341                 vp8cx_mb_init_quantizer(cpi, x);
1342             }
1343         }
1344     }
1345
1346     {
1347         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1348         if (cpi->zbin_mode_boost_enabled)
1349         {
1350             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1351                  cpi->zbin_mode_boost = 0;
1352             else
1353             {
1354                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1355                 {
1356                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1357                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1358                     else
1359                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1360                 }
1361                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1362                     cpi->zbin_mode_boost = 0;
1363                 else
1364                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1365             }
1366         }
1367         else
1368             cpi->zbin_mode_boost = 0;
1369
1370         vp8_update_zbin_extra(cpi, x);
1371     }
1372
1373     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1374
1375     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1376     {
1377         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1378
1379         if (xd->mode_info_context->mbmi.mode == B_PRED)
1380         {
1381             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1382         }
1383         else
1384         {
1385             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1386         }
1387
1388         sum_intra_stats(cpi, x);
1389     }
1390     else
1391     {
1392         MV best_ref_mv;
1393         MV nearest, nearby;
1394         int mdcounts[4];
1395         int ref_fb_idx;
1396
1397         vp8_find_near_mvs(xd, xd->mode_info_context,
1398                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1399
1400         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1401
1402         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1403             ref_fb_idx = cpi->common.lst_fb_idx;
1404         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1405             ref_fb_idx = cpi->common.gld_fb_idx;
1406         else
1407             ref_fb_idx = cpi->common.alt_fb_idx;
1408
1409         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1410         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1411         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1412
1413         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1414         {
1415             int i;
1416
1417             for (i = 0; i < 16; i++)
1418             {
1419                 if (xd->block[i].bmi.mode == NEW4X4)
1420                 {
1421                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1422                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1423                 }
1424             }
1425         }
1426         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1427         {
1428             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1429             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1430         }
1431
1432         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1433         {
1434             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1435
1436             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1437             if (!cpi->common.mb_no_coeff_skip)
1438                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1439
1440         }
1441         else
1442             vp8_stuff_inter16x16(x);
1443     }
1444
1445     if (!x->skip)
1446         vp8_tokenize_mb(cpi, xd, t);
1447     else
1448     {
1449         if (cpi->common.mb_no_coeff_skip)
1450         {
1451             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1452                 xd->mode_info_context->mbmi.dc_diff = 0;
1453             else
1454                 xd->mode_info_context->mbmi.dc_diff = 1;
1455
1456             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1457             cpi->skip_true_count ++;
1458             vp8_fix_contexts(xd);
1459         }
1460         else
1461         {
1462             vp8_stuff_mb(cpi, xd, t);
1463             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1464             cpi->skip_false_count ++;
1465         }
1466     }
1467
1468     return rate;
1469 }