vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368 }
 369
 370 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 371 {
 372     // Clear Zbin mode boost for default case
 373     cpi->zbin_mode_boost = 0;
 374
 375     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 376     // when these values are not all zero.
 377     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 378     {
 379         vp8cx_init_quantizer(cpi);
 380     }
 381
 382     // MB level quantizer setup
 383     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 384 }
 385
 386
 387 /* activity_avg must be positive, or flat regions could get a zero weight
 388  *  (infinite lambda), which confounds analysis.
 389  * This also avoids the need for divide by zero checks in
 390  *  vp8_activity_masking().
 391  */
 392 #define VP8_ACTIVITY_AVG_MIN (64)
 393
 394 /* This is used as a reference when computing the source variance for the
 395  *  purposes of activity masking.
 396  * Eventually this should be replaced by custom no-reference routines,
 397  *  which will be faster.
 398  */
 399 static const unsigned char VP8_VAR_OFFS[16]=
 400 {
 401     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 402 };
 403
 404 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 405 {
 406     unsigned int act;
 407     unsigned int sse;
 408     int sum;
 409     unsigned int a;
 410     unsigned int b;
 411     unsigned int d;
 412     /* TODO: This could also be done over smaller areas (8x8), but that would
 413      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 414      *  over an entire MB in most of the code.
 415      * Another option is to compute four 8x8 variances, and pick a single
 416      *  lambda using a non-linear combination (e.g., the smallest, or second
 417      *  smallest, etc.).
 418      */
 419     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 420      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 421     /* This requires a full 32 bits of precision. */
 422     act = (sse<<8) - sum*sum;
 423     /* Drop 4 to give us some headroom to work with. */
 424     act = (act + 8) >> 4;
 425     /* If the region is flat, lower the activity some more. */
 426     if (act < 8<<12)
 427         act = act < 5<<12 ? act : 5<<12;
 428     /* TODO: For non-flat regions, edge regions should receive less masking
 429      *  than textured regions, but identifying edge regions quickly and
 430      *  reliably enough is still a subject of experimentation.
 431      * This will be most noticable near edges with a complex shape (e.g.,
 432      *  text), but the 4x4 transform size should make this less of a problem
 433      *  than it would be for an 8x8 transform.
 434      */
 435     /* Apply the masking to the RD multiplier. */
 436     a = act + 4*cpi->activity_avg;
 437     b = 4*act + cpi->activity_avg;
 438     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 439     return act;
 440 }
 441
 442
 443
 444 static
 445 void encode_mb_row(VP8_COMP *cpi,
 446                    VP8_COMMON *cm,
 447                    int mb_row,
 448                    MACROBLOCK  *x,
 449                    MACROBLOCKD *xd,
 450                    TOKENEXTRA **tp,
 451                    int *segment_counts,
 452                    int *totalrate)
 453 {
 454     INT64 activity_sum = 0;
 455     int i;
 456     int recon_yoffset, recon_uvoffset;
 457     int mb_col;
 458     int ref_fb_idx = cm->lst_fb_idx;
 459     int dst_fb_idx = cm->new_fb_idx;
 460     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 461     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 462     int seg_map_index = (mb_row * cpi->common.mb_cols);
 463
 464
 465     // reset above block coeffs
 466     xd->above_context = cm->above_context;
 467
 468     xd->up_available = (mb_row != 0);
 469     recon_yoffset = (mb_row * recon_y_stride * 16);
 470     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 471
 472     cpi->tplist[mb_row].start = *tp;
 473     //printf("Main mb_row = %d\n", mb_row);
 474
 475     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 476     // units as they are always compared to values that are in 1/8th pel units
 477     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 478     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 479
 480     // Set up limit values for vertical motion vector components
 481     // to prevent them extending beyond the UMV borders
 482     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 483     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 484                         + (VP8BORDERINPIXELS - 16);
 485
 486     // for each macroblock col in image
 487     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 488     {
 489         // Distance of Mb to the left & right edges, specified in
 490         // 1/8th pel units as they are always compared to values
 491         // that are in 1/8th pel units
 492         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 493         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 494
 495         // Set up limit values for horizontal motion vector components
 496         // to prevent them extending beyond the UMV borders
 497         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 498         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 499                             + (VP8BORDERINPIXELS - 16);
 500
 501         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 502         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 503         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 504         xd->left_available = (mb_col != 0);
 505
 506         x->rddiv = cpi->RDDIV;
 507         x->rdmult = cpi->RDMULT;
 508
 509         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 510             activity_sum += vp8_activity_masking(cpi, x);
 511
 512         // Is segmentation enabled
 513         // MB level adjutment to quantizer
 514         if (xd->segmentation_enabled)
 515         {
 516             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 517             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 518                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 519             else
 520                 xd->mode_info_context->mbmi.segment_id = 0;
 521
 522             vp8cx_mb_init_quantizer(cpi, x);
 523         }
 524         else
 525             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 526
 527         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 528
 529         if (cm->frame_type == KEY_FRAME)
 530         {
 531             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 532 #ifdef MODE_STATS
 533             y_modes[xd->mbmi.mode] ++;
 534 #endif
 535         }
 536         else
 537         {
 538             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 539
 540 #ifdef MODE_STATS
 541             inter_y_modes[xd->mbmi.mode] ++;
 542
 543             if (xd->mbmi.mode == SPLITMV)
 544             {
 545                 int b;
 546
 547                 for (b = 0; b < xd->mbmi.partition_count; b++)
 548                 {
 549                     inter_b_modes[x->partition->bmi[b].mode] ++;
 550                 }
 551             }
 552
 553 #endif
 554
 555             // Count of last ref frame 0,0 useage
 556             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 557                 cpi->inter_zz_count ++;
 558
 559             // Special case code for cyclic refresh
 560             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 561             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 562             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 563             {
 564                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 565
 566                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 567                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 568                 // else mark it as dirty (1).
 569                 if (xd->mode_info_context->mbmi.segment_id)
 570                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 571                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 572                 {
 573                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 574                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 575                 }
 576                 else
 577                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 578
 579             }
 580         }
 581
 582         cpi->tplist[mb_row].stop = *tp;
 583
 584         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 585
 586         for (i = 0; i < 16; i++)
 587             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 588
 589         // adjust to the next column of macroblocks
 590         x->src.y_buffer += 16;
 591         x->src.u_buffer += 8;
 592         x->src.v_buffer += 8;
 593
 594         recon_yoffset += 16;
 595         recon_uvoffset += 8;
 596
 597         // Keep track of segment useage
 598         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 599
 600         // skip to next mb
 601         xd->mode_info_context++;
 602         x->partition_info++;
 603
 604         xd->above_context++;
 605         cpi->current_mb_col_main = mb_col;
 606     }
 607
 608     //extend the recon for intra prediction
 609     vp8_extend_mb_row(
 610         &cm->yv12_fb[dst_fb_idx],
 611         xd->dst.y_buffer + 16,
 612         xd->dst.u_buffer + 8,
 613         xd->dst.v_buffer + 8);
 614
 615     // this is to account for the border
 616     xd->mode_info_context++;
 617     x->partition_info++;
 618     x->activity_sum += activity_sum;
 619 }
 620
 621
 622
 623
 624
 625 void vp8_encode_frame(VP8_COMP *cpi)
 626 {
 627     int mb_row;
 628     MACROBLOCK *const x = & cpi->mb;
 629     VP8_COMMON *const cm = & cpi->common;
 630     MACROBLOCKD *const xd = & x->e_mbd;
 631
 632     int i;
 633     TOKENEXTRA *tp = cpi->tok;
 634     int segment_counts[MAX_MB_SEGMENTS];
 635     int totalrate;
 636
 637     // Functions setup for all frame types so we can use MC in AltRef
 638     if (cm->mcomp_filter_type == SIXTAP)
 639     {
 640         xd->subpixel_predict        = SUBPIX_INVOKE(
 641                                         &cpi->common.rtcd.subpix, sixtap4x4);
 642         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 643                                         &cpi->common.rtcd.subpix, sixtap8x4);
 644         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 645                                         &cpi->common.rtcd.subpix, sixtap8x8);
 646         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 647                                         &cpi->common.rtcd.subpix, sixtap16x16);
 648     }
 649     else
 650     {
 651         xd->subpixel_predict        = SUBPIX_INVOKE(
 652                                         &cpi->common.rtcd.subpix, bilinear4x4);
 653         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 654                                         &cpi->common.rtcd.subpix, bilinear8x4);
 655         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 656                                         &cpi->common.rtcd.subpix, bilinear8x8);
 657         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 658                                       &cpi->common.rtcd.subpix, bilinear16x16);
 659     }
 660
 661     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 662
 663     x->vector_range = 32;
 664
 665     // Count of MBs using the alternate Q if any
 666     cpi->alt_qcount = 0;
 667
 668     // Reset frame count of inter 0,0 motion vector useage.
 669     cpi->inter_zz_count = 0;
 670
 671     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 672
 673     cpi->prediction_error = 0;
 674     cpi->intra_error = 0;
 675     cpi->skip_true_count = 0;
 676     cpi->skip_false_count = 0;
 677
 678 #if 0
 679     // Experimental code
 680     cpi->frame_distortion = 0;
 681     cpi->last_mb_distortion = 0;
 682 #endif
 683
 684     totalrate = 0;
 685
 686     x->partition_info = x->pi;
 687
 688     xd->mode_info_context = cm->mi;
 689     xd->mode_info_stride = cm->mode_info_stride;
 690
 691     xd->frame_type = cm->frame_type;
 692
 693     xd->frames_since_golden = cm->frames_since_golden;
 694     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 695     vp8_zero(cpi->MVcount);
 696     // vp8_zero( Contexts)
 697     vp8_zero(cpi->coef_counts);
 698
 699     // reset intra mode contexts
 700     if (cm->frame_type == KEY_FRAME)
 701         vp8_init_mbmode_probs(cm);
 702
 703
 704     vp8cx_frame_init_quantizer(cpi);
 705
 706     if (cpi->compressor_speed == 2)
 707     {
 708         if (cpi->oxcf.cpu_used < 0)
 709             cpi->Speed = -(cpi->oxcf.cpu_used);
 710         else
 711             vp8_auto_select_speed(cpi);
 712     }
 713
 714     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 715     //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
 716     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 717     //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
 718
 719     // Copy data over into macro block data sturctures.
 720
 721     x->src = * cpi->Source;
 722     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 723     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 724
 725     // set up frame new frame for intra coded blocks
 726
 727     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 728
 729     vp8_build_block_offsets(x);
 730
 731     vp8_setup_block_dptrs(&x->e_mbd);
 732
 733     vp8_setup_block_ptrs(x);
 734
 735     x->activity_sum = 0;
 736
 737     xd->mode_info_context->mbmi.mode = DC_PRED;
 738     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 739
 740     xd->left_context = &cm->left_context;
 741
 742     vp8_zero(cpi->count_mb_ref_frame_usage)
 743     vp8_zero(cpi->ymode_count)
 744     vp8_zero(cpi->uv_mode_count)
 745
 746     x->mvc = cm->fc.mvc;
 747
 748     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 749
 750     {
 751         struct vpx_usec_timer  emr_timer;
 752         vpx_usec_timer_start(&emr_timer);
 753
 754         if (!cpi->b_multi_threaded)
 755         {
 756             // for each macroblock row in image
 757             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 758             {
 759
 760                 vp8_zero(cm->left_context)
 761
 762                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 763
 764                 // adjust to the next row of mbs
 765                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 766                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 767                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 768             }
 769
 770             cpi->tok_count = tp - cpi->tok;
 771
 772         }
 773         else
 774         {
 775 #if CONFIG_MULTITHREAD
 776             int i;
 777
 778             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 779
 780             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 781             {
 782                 cpi->current_mb_col_main = -1;
 783
 784                 for (i = 0; i < cpi->encoding_thread_count; i++)
 785                 {
 786                     if ((mb_row + i + 1) >= cm->mb_rows)
 787                         break;
 788
 789                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 790                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 791                     cpi->mb_row_ei[i].current_mb_col = -1;
 792                     //SetEvent(cpi->h_event_mbrencoding[i]);
 793                     sem_post(&cpi->h_event_mbrencoding[i]);
 794                 }
 795
 796                 vp8_zero(cm->left_context)
 797
 798                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 799
 800                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 801
 802                 // adjust to the next row of mbs
 803                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 804                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 805                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 806
 807                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 808                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 809
 810                 if (mb_row < cm->mb_rows - 1)
 811                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 812                     sem_wait(&cpi->h_event_main);
 813             }
 814
 815             /*
 816             for( ;mb_row<cm->mb_rows; mb_row ++)
 817             {
 818             vp8_zero( cm->left_context)
 819
 820             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 821
 822             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 823             // adjust to the next row of mbs
 824             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 825             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 826             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 827
 828             }
 829             */
 830             cpi->tok_count = 0;
 831
 832             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 833             {
 834                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 835             }
 836
 837             if (xd->segmentation_enabled)
 838             {
 839
 840                 int i, j;
 841
 842                 if (xd->segmentation_enabled)
 843                 {
 844
 845                     for (i = 0; i < cpi->encoding_thread_count; i++)
 846                     {
 847                         for (j = 0; j < 4; j++)
 848                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 849                     }
 850                 }
 851
 852             }
 853
 854             for (i = 0; i < cpi->encoding_thread_count; i++)
 855             {
 856                 totalrate += cpi->mb_row_ei[i].totalrate;
 857             }
 858
 859             for (i = 0; i < cpi->encoding_thread_count; i++)
 860             {
 861                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 862             }
 863
 864 #endif
 865
 866         }
 867
 868         vpx_usec_timer_mark(&emr_timer);
 869         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 870
 871     }
 872
 873
 874     // Work out the segment probabilites if segmentation is enabled
 875     if (xd->segmentation_enabled)
 876     {
 877         int tot_count;
 878         int i;
 879
 880         // Set to defaults
 881         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 882
 883         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 884
 885         if (tot_count)
 886         {
 887             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 888
 889             tot_count = segment_counts[0] + segment_counts[1];
 890
 891             if (tot_count > 0)
 892             {
 893                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 894             }
 895
 896             tot_count = segment_counts[2] + segment_counts[3];
 897
 898             if (tot_count > 0)
 899                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 900
 901             // Zero probabilities not allowed
 902             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 903             {
 904                 if (xd->mb_segment_tree_probs[i] == 0)
 905                     xd->mb_segment_tree_probs[i] = 1;
 906             }
 907         }
 908     }
 909
 910     // 256 rate units to the bit
 911     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 912
 913     // Make a note of the percentage MBs coded Intra.
 914     if (cm->frame_type == KEY_FRAME)
 915     {
 916         cpi->this_frame_percent_intra = 100;
 917     }
 918     else
 919     {
 920         int tot_modes;
 921
 922         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 923                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 924                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 925                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 926
 927         if (tot_modes)
 928             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 929
 930     }
 931
 932 #if 0
 933     {
 934         int cnt = 0;
 935         int flag[2] = {0, 0};
 936
 937         for (cnt = 0; cnt < MVPcount; cnt++)
 938         {
 939             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 940             {
 941                 flag[0] = 1;
 942                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 943                 break;
 944             }
 945         }
 946
 947         for (cnt = 0; cnt < MVPcount; cnt++)
 948         {
 949             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 950             {
 951                 flag[1] = 1;
 952                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 953                 break;
 954             }
 955         }
 956
 957         if (flag[0] || flag[1])
 958             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 959     }
 960 #endif
 961
 962     // Adjust the projected reference frame useage probability numbers to reflect
 963     // what we have just seen. This may be usefull when we make multiple itterations
 964     // of the recode loop rather than continuing to use values from the previous frame.
 965     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 966     {
 967         const int *const rfct = cpi->count_mb_ref_frame_usage;
 968         const int rf_intra = rfct[INTRA_FRAME];
 969         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 970
 971         if ((rf_intra + rf_inter) > 0)
 972         {
 973             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 974
 975             if (cpi->prob_intra_coded < 1)
 976                 cpi->prob_intra_coded = 1;
 977
 978             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
 979             {
 980                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
 981
 982                 if (cpi->prob_last_coded < 1)
 983                     cpi->prob_last_coded = 1;
 984
 985                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
 986                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
 987
 988                 if (cpi->prob_gf_coded < 1)
 989                     cpi->prob_gf_coded = 1;
 990             }
 991         }
 992     }
 993
 994 #if 0
 995     // Keep record of the total distortion this time around for future use
 996     cpi->last_frame_distortion = cpi->frame_distortion;
 997 #endif
 998
 999     /* Update the average activity for the next frame.
1000      * This is feed-forward for now; it could also be saved in two-pass, or
1001      *  done during lookahead when that is eventually added.
1002      */
1003     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1004     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1005         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1006
1007 }
1008 void vp8_setup_block_ptrs(MACROBLOCK *x)
1009 {
1010     int r, c;
1011     int i;
1012
1013     for (r = 0; r < 4; r++)
1014     {
1015         for (c = 0; c < 4; c++)
1016         {
1017             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1018         }
1019     }
1020
1021     for (r = 0; r < 2; r++)
1022     {
1023         for (c = 0; c < 2; c++)
1024         {
1025             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1026         }
1027     }
1028
1029
1030     for (r = 0; r < 2; r++)
1031     {
1032         for (c = 0; c < 2; c++)
1033         {
1034             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1035         }
1036     }
1037
1038     x->block[24].src_diff = x->src_diff + 384;
1039
1040
1041     for (i = 0; i < 25; i++)
1042     {
1043         x->block[i].coeff = x->coeff + i * 16;
1044     }
1045 }
1046
1047 void vp8_build_block_offsets(MACROBLOCK *x)
1048 {
1049     int block = 0;
1050     int br, bc;
1051
1052     vp8_build_block_doffsets(&x->e_mbd);
1053
1054     // y blocks
1055     for (br = 0; br < 4; br++)
1056     {
1057         for (bc = 0; bc < 4; bc++)
1058         {
1059             BLOCK *this_block = &x->block[block];
1060             this_block->base_src = &x->src.y_buffer;
1061             this_block->src_stride = x->src.y_stride;
1062             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1063             ++block;
1064         }
1065     }
1066
1067     // u blocks
1068     for (br = 0; br < 2; br++)
1069     {
1070         for (bc = 0; bc < 2; bc++)
1071         {
1072             BLOCK *this_block = &x->block[block];
1073             this_block->base_src = &x->src.u_buffer;
1074             this_block->src_stride = x->src.uv_stride;
1075             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1076             ++block;
1077         }
1078     }
1079
1080     // v blocks
1081     for (br = 0; br < 2; br++)
1082     {
1083         for (bc = 0; bc < 2; bc++)
1084         {
1085             BLOCK *this_block = &x->block[block];
1086             this_block->base_src = &x->src.v_buffer;
1087             this_block->src_stride = x->src.uv_stride;
1088             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1089             ++block;
1090         }
1091     }
1092 }
1093
1094 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1095 {
1096     const MACROBLOCKD *xd = & x->e_mbd;
1097     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1098     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1099
1100 #ifdef MODE_STATS
1101     const int is_key = cpi->common.frame_type == KEY_FRAME;
1102
1103     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1104
1105     if (m == B_PRED)
1106     {
1107         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1108
1109         int b = 0;
1110
1111         do
1112         {
1113             ++ bct[xd->block[b].bmi.mode];
1114         }
1115         while (++b < 16);
1116     }
1117
1118 #endif
1119
1120     ++cpi->ymode_count[m];
1121     ++cpi->uv_mode_count[uvm];
1122
1123 }
1124 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1125 {
1126     int Error4x4, Error16x16, error_uv;
1127     B_PREDICTION_MODE intra_bmodes[16];
1128     int rate4x4, rate16x16, rateuv;
1129     int dist4x4, dist16x16, distuv;
1130     int rate = 0;
1131     int rate4x4_tokenonly = 0;
1132     int rate16x16_tokenonly = 0;
1133     int rateuv_tokenonly = 0;
1134     int i;
1135
1136     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1137
1138 #if !(CONFIG_REALTIME_ONLY)
1139
1140     if (cpi->sf.RD || cpi->compressor_speed != 2)
1141     {
1142         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1143
1144         //save the b modes for possible later use
1145         for (i = 0; i < 16; i++)
1146             intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1147
1148         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1149
1150         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1151
1152         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1153         rate += rateuv;
1154
1155         if (Error4x4 < Error16x16)
1156         {
1157             rate += rate4x4;
1158             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1159
1160             // get back the intra block modes
1161             for (i = 0; i < 16; i++)
1162                 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1163
1164             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1165             cpi->prediction_error += Error4x4 ;
1166 #if 0
1167             // Experimental RD code
1168             cpi->frame_distortion += dist4x4;
1169 #endif
1170         }
1171         else
1172         {
1173             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1174             rate += rate16x16;
1175
1176 #if 0
1177             // Experimental RD code
1178             cpi->prediction_error += Error16x16;
1179             cpi->frame_distortion += dist16x16;
1180 #endif
1181         }
1182
1183         sum_intra_stats(cpi, x);
1184
1185         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1186     }
1187     else
1188 #endif
1189     {
1190
1191         int rate2, distortion2;
1192         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1193         int this_rd;
1194         Error16x16 = INT_MAX;
1195
1196         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1197         {
1198             x->e_mbd.mode_info_context->mbmi.mode = mode;
1199             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1200             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1201             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1202             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1203
1204             if (Error16x16 > this_rd)
1205             {
1206                 Error16x16 = this_rd;
1207                 best_mode = mode;
1208             }
1209         }
1210
1211         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1212
1213         if (distortion2 == INT_MAX)
1214             Error4x4 = INT_MAX;
1215         else
1216             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1217
1218         if (Error4x4 < Error16x16)
1219         {
1220             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1221             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1222             cpi->prediction_error += Error4x4;
1223         }
1224         else
1225         {
1226             x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1227             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1228             cpi->prediction_error += Error16x16;
1229         }
1230
1231         vp8_pick_intra_mbuv_mode(x);
1232         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1233         sum_intra_stats(cpi, x);
1234         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1235     }
1236
1237     return rate;
1238 }
1239 #ifdef SPEEDSTATS
1240 extern int cnt_pm;
1241 #endif
1242
1243 extern void vp8_fix_contexts(MACROBLOCKD *x);
1244
1245 int vp8cx_encode_inter_macroblock
1246 (
1247     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1248     int recon_yoffset, int recon_uvoffset
1249 )
1250 {
1251     MACROBLOCKD *const xd = &x->e_mbd;
1252     int inter_error;
1253     int intra_error = 0;
1254     int rate;
1255     int distortion;
1256
1257     x->skip = 0;
1258
1259     if (xd->segmentation_enabled)
1260         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1261     else
1262         x->encode_breakout = cpi->oxcf.encode_breakout;
1263
1264 #if !(CONFIG_REALTIME_ONLY)
1265
1266     if (cpi->sf.RD)
1267     {
1268         /* Are we using the fast quantizer for the mode selection? */
1269         if(cpi->sf.use_fastquant_for_pick)
1270             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1271
1272         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1273
1274         /* switch back to the regular quantizer for the encode */
1275         if (cpi->sf.improved_quant)
1276         {
1277             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1278         }
1279
1280     }
1281     else
1282 #endif
1283         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1284
1285
1286     cpi->prediction_error += inter_error;
1287     cpi->intra_error += intra_error;
1288
1289 #if 0
1290     // Experimental RD code
1291     cpi->frame_distortion += distortion;
1292     cpi->last_mb_distortion = distortion;
1293 #endif
1294
1295     // MB level adjutment to quantizer setup
1296     if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
1297     {
1298         // If cyclic update enabled
1299         if (cpi->cyclic_refresh_mode_enabled)
1300         {
1301             // Clear segment_id back to 0 if not coded (last frame 0,0)
1302             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1303                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1304             {
1305                 xd->mode_info_context->mbmi.segment_id = 0;
1306             }
1307         }
1308
1309         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1310         if (cpi->zbin_mode_boost_enabled)
1311         {
1312             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1313                  cpi->zbin_mode_boost = 0;
1314             else
1315             {
1316                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1317                 {
1318                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1319                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1320                     else
1321                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1322                 }
1323                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1324                     cpi->zbin_mode_boost = 0;
1325                 else
1326                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1327             }
1328         }
1329         else
1330             cpi->zbin_mode_boost = 0;
1331
1332         vp8cx_mb_init_quantizer(cpi,  x);
1333     }
1334
1335     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1336
1337     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1338     {
1339         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1340
1341         if (xd->mode_info_context->mbmi.mode == B_PRED)
1342         {
1343             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1344         }
1345         else
1346         {
1347             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1348         }
1349
1350         sum_intra_stats(cpi, x);
1351     }
1352     else
1353     {
1354         MV best_ref_mv;
1355         MV nearest, nearby;
1356         int mdcounts[4];
1357         int ref_fb_idx;
1358
1359         vp8_find_near_mvs(xd, xd->mode_info_context,
1360                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1361
1362         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1363
1364         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1365             ref_fb_idx = cpi->common.lst_fb_idx;
1366         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1367             ref_fb_idx = cpi->common.gld_fb_idx;
1368         else
1369             ref_fb_idx = cpi->common.alt_fb_idx;
1370
1371         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1372         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1373         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1374
1375         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1376         {
1377             int i;
1378
1379             for (i = 0; i < 16; i++)
1380             {
1381                 if (xd->block[i].bmi.mode == NEW4X4)
1382                 {
1383                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1384                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1385                 }
1386             }
1387         }
1388         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1389         {
1390             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1391             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1392         }
1393
1394         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1395         {
1396             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1397
1398             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1399             if (!cpi->common.mb_no_coeff_skip)
1400                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1401
1402         }
1403         else
1404             vp8_stuff_inter16x16(x);
1405     }
1406
1407     if (!x->skip)
1408         vp8_tokenize_mb(cpi, xd, t);
1409     else
1410     {
1411         if (cpi->common.mb_no_coeff_skip)
1412         {
1413             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1414                 xd->mode_info_context->mbmi.dc_diff = 0;
1415             else
1416                 xd->mode_info_context->mbmi.dc_diff = 1;
1417
1418             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1419             cpi->skip_true_count ++;
1420             vp8_fix_contexts(xd);
1421         }
1422         else
1423         {
1424             vp8_stuff_mb(cpi, xd, t);
1425             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1426             cpi->skip_false_count ++;
1427         }
1428     }
1429
1430     return rate;
1431 }