vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 183                            cpi->Y1quant_shift[Q] + 0, quant_val);
 184         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 185         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 186         cpi->common.Y1dequant[Q][0] = quant_val;
 187         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 188
 189         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 190         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 191                            cpi->Y2quant_shift[Q] + 0, quant_val);
 192         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 193         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 194         cpi->common.Y2dequant[Q][0] = quant_val;
 195         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 196
 197         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 198         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 199                            cpi->UVquant_shift[Q] + 0, quant_val);
 200         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 201         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 202         cpi->common.UVdequant[Q][0] = quant_val;
 203         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 204
 205         // all the ac values = ;
 206         for (i = 1; i < 16; i++)
 207         {
 208             int rc = vp8_default_zig_zag1d[i];
 209
 210             quant_val = vp8_ac_yquant(Q);
 211             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 212                                cpi->Y1quant_shift[Q] + rc, quant_val);
 213             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 214             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 215             cpi->common.Y1dequant[Q][rc] = quant_val;
 216             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 217
 218             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 219             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 220                                cpi->Y2quant_shift[Q] + rc, quant_val);
 221             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 222             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 223             cpi->common.Y2dequant[Q][rc] = quant_val;
 224             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 225
 226             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 227             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 228                                cpi->UVquant_shift[Q] + rc, quant_val);
 229             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 230             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 231             cpi->common.UVdequant[Q][rc] = quant_val;
 232             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 233         }
 234     }
 235 }
 236 #else
 237 void vp8cx_init_quantizer(VP8_COMP *cpi)
 238 {
 239     int i;
 240     int quant_val;
 241     int Q;
 242
 243     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 244
 245     for (Q = 0; Q < QINDEX_RANGE; Q++)
 246     {
 247         // dc values
 248         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 249         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 250         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 251         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 252         cpi->common.Y1dequant[Q][0] = quant_val;
 253         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 254
 255         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 256         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 257         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 258         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 259         cpi->common.Y2dequant[Q][0] = quant_val;
 260         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 261
 262         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 263         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 264         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 265         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 266         cpi->common.UVdequant[Q][0] = quant_val;
 267         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 268
 269         // all the ac values = ;
 270         for (i = 1; i < 16; i++)
 271         {
 272             int rc = vp8_default_zig_zag1d[i];
 273
 274             quant_val = vp8_ac_yquant(Q);
 275             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 276             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 277             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 278             cpi->common.Y1dequant[Q][rc] = quant_val;
 279             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 280
 281             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 282             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 283             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 284             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 285             cpi->common.Y2dequant[Q][rc] = quant_val;
 286             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 287
 288             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 289             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 290             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 291             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 292             cpi->common.UVdequant[Q][rc] = quant_val;
 293             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 294         }
 295     }
 296 }
 297 #endif
 298 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 299 {
 300     int i;
 301     int QIndex;
 302     MACROBLOCKD *xd = &x->e_mbd;
 303     int zbin_extra;
 304
 305     // Select the baseline MB Q index.
 306     if (xd->segmentation_enabled)
 307     {
 308         // Abs Value
 309         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 310
 311             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 312         // Delta Value
 313         else
 314         {
 315             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 316             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 317         }
 318     }
 319     else
 320         QIndex = cpi->common.base_qindex;
 321
 322     // Y
 323     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 324
 325     for (i = 0; i < 16; i++)
 326     {
 327         x->block[i].quant = cpi->Y1quant[QIndex];
 328         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 329         x->block[i].zbin = cpi->Y1zbin[QIndex];
 330         x->block[i].round = cpi->Y1round[QIndex];
 331         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 332         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 333         x->block[i].zbin_extra = (short)zbin_extra;
 334     }
 335
 336     // UV
 337     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 338
 339     for (i = 16; i < 24; i++)
 340     {
 341         x->block[i].quant = cpi->UVquant[QIndex];
 342         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 343         x->block[i].zbin = cpi->UVzbin[QIndex];
 344         x->block[i].round = cpi->UVround[QIndex];
 345         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 346         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 347         x->block[i].zbin_extra = (short)zbin_extra;
 348     }
 349
 350     // Y2
 351     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 352     x->block[24].quant = cpi->Y2quant[QIndex];
 353     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 354     x->block[24].zbin = cpi->Y2zbin[QIndex];
 355     x->block[24].round = cpi->Y2round[QIndex];
 356     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 357     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 358     x->block[24].zbin_extra = (short)zbin_extra;
 359 }
 360
 361 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 362 {
 363     // Clear Zbin mode boost for default case
 364     cpi->zbin_mode_boost = 0;
 365
 366     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 367     // when these values are not all zero.
 368     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 369     {
 370         vp8cx_init_quantizer(cpi);
 371     }
 372
 373     // MB level quantizer setup
 374     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 375 }
 376
 377
 378 /* activity_avg must be positive, or flat regions could get a zero weight
 379  *  (infinite lambda), which confounds analysis.
 380  * This also avoids the need for divide by zero checks in
 381  *  vp8_activity_masking().
 382  */
 383 #define VP8_ACTIVITY_AVG_MIN (64)
 384
 385 /* This is used as a reference when computing the source variance for the
 386  *  purposes of activity masking.
 387  * Eventually this should be replaced by custom no-reference routines,
 388  *  which will be faster.
 389  */
 390 static const unsigned char VP8_VAR_OFFS[16]=
 391 {
 392     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 393 };
 394
 395 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 396 {
 397     unsigned int act;
 398     unsigned int sse;
 399     int sum;
 400     unsigned int a;
 401     unsigned int b;
 402     unsigned int d;
 403     /* TODO: This could also be done over smaller areas (8x8), but that would
 404      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 405      *  over an entire MB in most of the code.
 406      * Another option is to compute four 8x8 variances, and pick a single
 407      *  lambda using a non-linear combination (e.g., the smallest, or second
 408      *  smallest, etc.).
 409      */
 410     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 411      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 412     /* This requires a full 32 bits of precision. */
 413     act = (sse<<8) - sum*sum;
 414     /* Drop 4 to give us some headroom to work with. */
 415     act = (act + 8) >> 4;
 416     /* If the region is flat, lower the activity some more. */
 417     if (act < 8<<12)
 418         act = act < 5<<12 ? act : 5<<12;
 419     /* TODO: For non-flat regions, edge regions should receive less masking
 420      *  than textured regions, but identifying edge regions quickly and
 421      *  reliably enough is still a subject of experimentation.
 422      * This will be most noticable near edges with a complex shape (e.g.,
 423      *  text), but the 4x4 transform size should make this less of a problem
 424      *  than it would be for an 8x8 transform.
 425      */
 426     /* Apply the masking to the RD multiplier. */
 427     a = act + 4*cpi->activity_avg;
 428     b = 4*act + cpi->activity_avg;
 429     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 430     return act;
 431 }
 432
 433
 434
 435 static
 436 void encode_mb_row(VP8_COMP *cpi,
 437                    VP8_COMMON *cm,
 438                    int mb_row,
 439                    MACROBLOCK  *x,
 440                    MACROBLOCKD *xd,
 441                    TOKENEXTRA **tp,
 442                    int *segment_counts,
 443                    int *totalrate)
 444 {
 445     INT64 activity_sum = 0;
 446     int i;
 447     int recon_yoffset, recon_uvoffset;
 448     int mb_col;
 449     int ref_fb_idx = cm->lst_fb_idx;
 450     int dst_fb_idx = cm->new_fb_idx;
 451     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 452     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 453     int seg_map_index = (mb_row * cpi->common.mb_cols);
 454
 455
 456     // reset above block coeffs
 457     xd->above_context = cm->above_context;
 458
 459     xd->up_available = (mb_row != 0);
 460     recon_yoffset = (mb_row * recon_y_stride * 16);
 461     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 462
 463     cpi->tplist[mb_row].start = *tp;
 464     //printf("Main mb_row = %d\n", mb_row);
 465
 466     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 467     // units as they are always compared to values that are in 1/8th pel units
 468     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 469     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 470
 471     // Set up limit values for vertical motion vector components
 472     // to prevent them extending beyond the UMV borders
 473     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 474     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 475                         + (VP8BORDERINPIXELS - 16);
 476
 477     // for each macroblock col in image
 478     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 479     {
 480         // Distance of Mb to the left & right edges, specified in
 481         // 1/8th pel units as they are always compared to values
 482         // that are in 1/8th pel units
 483         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 484         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 485
 486         // Set up limit values for horizontal motion vector components
 487         // to prevent them extending beyond the UMV borders
 488         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 489         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 490                             + (VP8BORDERINPIXELS - 16);
 491
 492         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 493         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 494         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 495         xd->left_available = (mb_col != 0);
 496
 497         x->rddiv = cpi->RDDIV;
 498         x->rdmult = cpi->RDMULT;
 499
 500         activity_sum += vp8_activity_masking(cpi, x);
 501
 502         // Is segmentation enabled
 503         // MB level adjutment to quantizer
 504         if (xd->segmentation_enabled)
 505         {
 506             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 507             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 508                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 509             else
 510                 xd->mode_info_context->mbmi.segment_id = 0;
 511
 512             vp8cx_mb_init_quantizer(cpi, x);
 513         }
 514         else
 515             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 516
 517         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 518
 519         if (cm->frame_type == KEY_FRAME)
 520         {
 521             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 522 #ifdef MODE_STATS
 523             y_modes[xd->mbmi.mode] ++;
 524 #endif
 525         }
 526         else
 527         {
 528             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 529
 530 #ifdef MODE_STATS
 531             inter_y_modes[xd->mbmi.mode] ++;
 532
 533             if (xd->mbmi.mode == SPLITMV)
 534             {
 535                 int b;
 536
 537                 for (b = 0; b < xd->mbmi.partition_count; b++)
 538                 {
 539                     inter_b_modes[x->partition->bmi[b].mode] ++;
 540                 }
 541             }
 542
 543 #endif
 544
 545             // Count of last ref frame 0,0 useage
 546             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 547                 cpi->inter_zz_count ++;
 548
 549             // Special case code for cyclic refresh
 550             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 551             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 552             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 553             {
 554                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 555
 556                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 557                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 558                 // else mark it as dirty (1).
 559                 if (xd->mode_info_context->mbmi.segment_id)
 560                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 561                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 562                 {
 563                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 564                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 565                 }
 566                 else
 567                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 568
 569             }
 570         }
 571
 572         cpi->tplist[mb_row].stop = *tp;
 573
 574         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 575
 576         for (i = 0; i < 16; i++)
 577             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 578
 579         // adjust to the next column of macroblocks
 580         x->src.y_buffer += 16;
 581         x->src.u_buffer += 8;
 582         x->src.v_buffer += 8;
 583
 584         recon_yoffset += 16;
 585         recon_uvoffset += 8;
 586
 587         // Keep track of segment useage
 588         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 589
 590         // skip to next mb
 591         xd->mode_info_context++;
 592         x->partition_info++;
 593
 594         xd->above_context++;
 595         cpi->current_mb_col_main = mb_col;
 596     }
 597
 598     //extend the recon for intra prediction
 599     vp8_extend_mb_row(
 600         &cm->yv12_fb[dst_fb_idx],
 601         xd->dst.y_buffer + 16,
 602         xd->dst.u_buffer + 8,
 603         xd->dst.v_buffer + 8);
 604
 605     // this is to account for the border
 606     xd->mode_info_context++;
 607     x->partition_info++;
 608     x->activity_sum += activity_sum;
 609 }
 610
 611
 612
 613
 614
 615 void vp8_encode_frame(VP8_COMP *cpi)
 616 {
 617     int mb_row;
 618     MACROBLOCK *const x = & cpi->mb;
 619     VP8_COMMON *const cm = & cpi->common;
 620     MACROBLOCKD *const xd = & x->e_mbd;
 621
 622     int i;
 623     TOKENEXTRA *tp = cpi->tok;
 624     int segment_counts[MAX_MB_SEGMENTS];
 625     int totalrate;
 626
 627     // Functions setup for all frame types so we can use MC in AltRef
 628     if (cm->mcomp_filter_type == SIXTAP)
 629     {
 630         xd->subpixel_predict        = SUBPIX_INVOKE(
 631                                         &cpi->common.rtcd.subpix, sixtap4x4);
 632         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 633                                         &cpi->common.rtcd.subpix, sixtap8x4);
 634         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 635                                         &cpi->common.rtcd.subpix, sixtap8x8);
 636         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 637                                         &cpi->common.rtcd.subpix, sixtap16x16);
 638     }
 639     else
 640     {
 641         xd->subpixel_predict        = SUBPIX_INVOKE(
 642                                         &cpi->common.rtcd.subpix, bilinear4x4);
 643         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 644                                         &cpi->common.rtcd.subpix, bilinear8x4);
 645         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 646                                         &cpi->common.rtcd.subpix, bilinear8x8);
 647         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 648                                       &cpi->common.rtcd.subpix, bilinear16x16);
 649     }
 650
 651     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 652
 653     x->vector_range = 32;
 654
 655     // Count of MBs using the alternate Q if any
 656     cpi->alt_qcount = 0;
 657
 658     // Reset frame count of inter 0,0 motion vector useage.
 659     cpi->inter_zz_count = 0;
 660
 661     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 662
 663     cpi->prediction_error = 0;
 664     cpi->intra_error = 0;
 665     cpi->skip_true_count = 0;
 666     cpi->skip_false_count = 0;
 667
 668 #if 0
 669     // Experimental code
 670     cpi->frame_distortion = 0;
 671     cpi->last_mb_distortion = 0;
 672 #endif
 673
 674     totalrate = 0;
 675
 676     x->partition_info = x->pi;
 677
 678     xd->mode_info_context = cm->mi;
 679     xd->mode_info_stride = cm->mode_info_stride;
 680
 681     xd->frame_type = cm->frame_type;
 682
 683     xd->frames_since_golden = cm->frames_since_golden;
 684     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 685     vp8_zero(cpi->MVcount);
 686     // vp8_zero( Contexts)
 687     vp8_zero(cpi->coef_counts);
 688
 689     // reset intra mode contexts
 690     if (cm->frame_type == KEY_FRAME)
 691         vp8_init_mbmode_probs(cm);
 692
 693
 694     vp8cx_frame_init_quantizer(cpi);
 695
 696     if (cpi->compressor_speed == 2)
 697     {
 698         if (cpi->oxcf.cpu_used < 0)
 699             cpi->Speed = -(cpi->oxcf.cpu_used);
 700         else
 701             vp8_auto_select_speed(cpi);
 702     }
 703
 704     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 705     //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
 706     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 707     //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
 708
 709     // Copy data over into macro block data sturctures.
 710
 711     x->src = * cpi->Source;
 712     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 713     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 714
 715     // set up frame new frame for intra coded blocks
 716
 717     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 718
 719     vp8_build_block_offsets(x);
 720
 721     vp8_setup_block_dptrs(&x->e_mbd);
 722
 723     vp8_setup_block_ptrs(x);
 724
 725     x->activity_sum = 0;
 726
 727 #if 0
 728     // Experimental rd code
 729     // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics
 730     // such as cpi->rate_correction_factor that indicate relative complexity.
 731     /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) )
 732     {
 733         //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb;
 734         x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor);
 735     }
 736     else
 737         x->rdmult = cpi->RDMULT; */
 738     //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
 739 #endif
 740
 741     xd->mode_info_context->mbmi.mode = DC_PRED;
 742     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 743
 744     xd->left_context = &cm->left_context;
 745
 746     vp8_zero(cpi->count_mb_ref_frame_usage)
 747     vp8_zero(cpi->ymode_count)
 748     vp8_zero(cpi->uv_mode_count)
 749
 750     x->mvc = cm->fc.mvc;
 751
 752     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 753
 754     {
 755         struct vpx_usec_timer  emr_timer;
 756         vpx_usec_timer_start(&emr_timer);
 757
 758         if (!cpi->b_multi_threaded)
 759         {
 760             // for each macroblock row in image
 761             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 762             {
 763
 764                 vp8_zero(cm->left_context)
 765
 766                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 767
 768                 // adjust to the next row of mbs
 769                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 770                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 771                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 772             }
 773
 774             cpi->tok_count = tp - cpi->tok;
 775
 776         }
 777         else
 778         {
 779 #if CONFIG_MULTITHREAD
 780             int i;
 781
 782             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 783
 784             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 785             {
 786                 cpi->current_mb_col_main = -1;
 787
 788                 for (i = 0; i < cpi->encoding_thread_count; i++)
 789                 {
 790                     if ((mb_row + i + 1) >= cm->mb_rows)
 791                         break;
 792
 793                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 794                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 795                     cpi->mb_row_ei[i].current_mb_col = -1;
 796                     //SetEvent(cpi->h_event_mbrencoding[i]);
 797                     sem_post(&cpi->h_event_mbrencoding[i]);
 798                 }
 799
 800                 vp8_zero(cm->left_context)
 801
 802                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 803
 804                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 805
 806                 // adjust to the next row of mbs
 807                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 808                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 809                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 810
 811                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 812                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 813
 814                 if (mb_row < cm->mb_rows - 1)
 815                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 816                     sem_wait(&cpi->h_event_main);
 817             }
 818
 819             /*
 820             for( ;mb_row<cm->mb_rows; mb_row ++)
 821             {
 822             vp8_zero( cm->left_context)
 823
 824             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 825
 826             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 827             // adjust to the next row of mbs
 828             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 829             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 830             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 831
 832             }
 833             */
 834             cpi->tok_count = 0;
 835
 836             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 837             {
 838                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 839             }
 840
 841             if (xd->segmentation_enabled)
 842             {
 843
 844                 int i, j;
 845
 846                 if (xd->segmentation_enabled)
 847                 {
 848
 849                     for (i = 0; i < cpi->encoding_thread_count; i++)
 850                     {
 851                         for (j = 0; j < 4; j++)
 852                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 853                     }
 854                 }
 855
 856             }
 857
 858             for (i = 0; i < cpi->encoding_thread_count; i++)
 859             {
 860                 totalrate += cpi->mb_row_ei[i].totalrate;
 861             }
 862
 863             for (i = 0; i < cpi->encoding_thread_count; i++)
 864             {
 865                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 866             }
 867
 868 #endif
 869
 870         }
 871
 872         vpx_usec_timer_mark(&emr_timer);
 873         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 874
 875     }
 876
 877
 878     // Work out the segment probabilites if segmentation is enabled
 879     if (xd->segmentation_enabled)
 880     {
 881         int tot_count;
 882         int i;
 883
 884         // Set to defaults
 885         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 886
 887         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 888
 889         if (tot_count)
 890         {
 891             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 892
 893             tot_count = segment_counts[0] + segment_counts[1];
 894
 895             if (tot_count > 0)
 896             {
 897                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 898             }
 899
 900             tot_count = segment_counts[2] + segment_counts[3];
 901
 902             if (tot_count > 0)
 903                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 904
 905             // Zero probabilities not allowed
 906             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 907             {
 908                 if (xd->mb_segment_tree_probs[i] == 0)
 909                     xd->mb_segment_tree_probs[i] = 1;
 910             }
 911         }
 912     }
 913
 914     // 256 rate units to the bit
 915     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 916
 917     // Make a note of the percentage MBs coded Intra.
 918     if (cm->frame_type == KEY_FRAME)
 919     {
 920         cpi->this_frame_percent_intra = 100;
 921     }
 922     else
 923     {
 924         int tot_modes;
 925
 926         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 927                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 928                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 929                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 930
 931         if (tot_modes)
 932             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 933
 934     }
 935
 936 #if 0
 937     {
 938         int cnt = 0;
 939         int flag[2] = {0, 0};
 940
 941         for (cnt = 0; cnt < MVPcount; cnt++)
 942         {
 943             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 944             {
 945                 flag[0] = 1;
 946                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 947                 break;
 948             }
 949         }
 950
 951         for (cnt = 0; cnt < MVPcount; cnt++)
 952         {
 953             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 954             {
 955                 flag[1] = 1;
 956                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 957                 break;
 958             }
 959         }
 960
 961         if (flag[0] || flag[1])
 962             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 963     }
 964 #endif
 965
 966     // Adjust the projected reference frame useage probability numbers to reflect
 967     // what we have just seen. This may be usefull when we make multiple itterations
 968     // of the recode loop rather than continuing to use values from the previous frame.
 969     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 970     {
 971         const int *const rfct = cpi->count_mb_ref_frame_usage;
 972         const int rf_intra = rfct[INTRA_FRAME];
 973         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 974
 975         if ((rf_intra + rf_inter) > 0)
 976         {
 977             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 978
 979             if (cpi->prob_intra_coded < 1)
 980                 cpi->prob_intra_coded = 1;
 981
 982             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
 983             {
 984                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
 985
 986                 if (cpi->prob_last_coded < 1)
 987                     cpi->prob_last_coded = 1;
 988
 989                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
 990                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
 991
 992                 if (cpi->prob_gf_coded < 1)
 993                     cpi->prob_gf_coded = 1;
 994             }
 995         }
 996     }
 997
 998 #if 0
 999     // Keep record of the total distortion this time around for future use
1000     cpi->last_frame_distortion = cpi->frame_distortion;
1001 #endif
1002
1003     /* Update the average activity for the next frame.
1004      * This is feed-forward for now; it could also be saved in two-pass, or
1005      *  done during lookahead when that is eventually added.
1006      */
1007     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1008     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1009         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1010
1011 }
1012 void vp8_setup_block_ptrs(MACROBLOCK *x)
1013 {
1014     int r, c;
1015     int i;
1016
1017     for (r = 0; r < 4; r++)
1018     {
1019         for (c = 0; c < 4; c++)
1020         {
1021             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1022         }
1023     }
1024
1025     for (r = 0; r < 2; r++)
1026     {
1027         for (c = 0; c < 2; c++)
1028         {
1029             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1030         }
1031     }
1032
1033
1034     for (r = 0; r < 2; r++)
1035     {
1036         for (c = 0; c < 2; c++)
1037         {
1038             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1039         }
1040     }
1041
1042     x->block[24].src_diff = x->src_diff + 384;
1043
1044
1045     for (i = 0; i < 25; i++)
1046     {
1047         x->block[i].coeff = x->coeff + i * 16;
1048     }
1049 }
1050
1051 void vp8_build_block_offsets(MACROBLOCK *x)
1052 {
1053     int block = 0;
1054     int br, bc;
1055
1056     vp8_build_block_doffsets(&x->e_mbd);
1057
1058     // y blocks
1059     for (br = 0; br < 4; br++)
1060     {
1061         for (bc = 0; bc < 4; bc++)
1062         {
1063             BLOCK *this_block = &x->block[block];
1064             this_block->base_src = &x->src.y_buffer;
1065             this_block->src_stride = x->src.y_stride;
1066             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1067             ++block;
1068         }
1069     }
1070
1071     // u blocks
1072     for (br = 0; br < 2; br++)
1073     {
1074         for (bc = 0; bc < 2; bc++)
1075         {
1076             BLOCK *this_block = &x->block[block];
1077             this_block->base_src = &x->src.u_buffer;
1078             this_block->src_stride = x->src.uv_stride;
1079             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1080             ++block;
1081         }
1082     }
1083
1084     // v blocks
1085     for (br = 0; br < 2; br++)
1086     {
1087         for (bc = 0; bc < 2; bc++)
1088         {
1089             BLOCK *this_block = &x->block[block];
1090             this_block->base_src = &x->src.v_buffer;
1091             this_block->src_stride = x->src.uv_stride;
1092             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1093             ++block;
1094         }
1095     }
1096 }
1097
1098 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1099 {
1100     const MACROBLOCKD *xd = & x->e_mbd;
1101     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1102     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1103
1104 #ifdef MODE_STATS
1105     const int is_key = cpi->common.frame_type == KEY_FRAME;
1106
1107     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1108
1109     if (m == B_PRED)
1110     {
1111         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1112
1113         int b = 0;
1114
1115         do
1116         {
1117             ++ bct[xd->block[b].bmi.mode];
1118         }
1119         while (++b < 16);
1120     }
1121
1122 #endif
1123
1124     ++cpi->ymode_count[m];
1125     ++cpi->uv_mode_count[uvm];
1126
1127 }
1128 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1129 {
1130     int Error4x4, Error16x16, error_uv;
1131     B_PREDICTION_MODE intra_bmodes[16];
1132     int rate4x4, rate16x16, rateuv;
1133     int dist4x4, dist16x16, distuv;
1134     int rate = 0;
1135     int rate4x4_tokenonly = 0;
1136     int rate16x16_tokenonly = 0;
1137     int rateuv_tokenonly = 0;
1138     int i;
1139
1140     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1141
1142 #if !(CONFIG_REALTIME_ONLY)
1143
1144     if (cpi->sf.RD || cpi->compressor_speed != 2)
1145     {
1146         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1147
1148         //save the b modes for possible later use
1149         for (i = 0; i < 16; i++)
1150             intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1151
1152         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1153
1154         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1155
1156         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1157         rate += rateuv;
1158
1159         if (Error4x4 < Error16x16)
1160         {
1161             rate += rate4x4;
1162             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1163
1164             // get back the intra block modes
1165             for (i = 0; i < 16; i++)
1166                 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1167
1168             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1169             cpi->prediction_error += Error4x4 ;
1170 #if 0
1171             // Experimental RD code
1172             cpi->frame_distortion += dist4x4;
1173 #endif
1174         }
1175         else
1176         {
1177             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1178             rate += rate16x16;
1179
1180 #if 0
1181             // Experimental RD code
1182             cpi->prediction_error += Error16x16;
1183             cpi->frame_distortion += dist16x16;
1184 #endif
1185         }
1186
1187         sum_intra_stats(cpi, x);
1188
1189         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1190     }
1191     else
1192 #endif
1193     {
1194
1195         int rate2, distortion2;
1196         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1197         int this_rd;
1198         Error16x16 = INT_MAX;
1199
1200         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1201         {
1202             x->e_mbd.mode_info_context->mbmi.mode = mode;
1203             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1204             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1205             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1206             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1207
1208             if (Error16x16 > this_rd)
1209             {
1210                 Error16x16 = this_rd;
1211                 best_mode = mode;
1212             }
1213         }
1214
1215         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1216
1217         if (distortion2 == INT_MAX)
1218             Error4x4 = INT_MAX;
1219         else
1220             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1221
1222         if (Error4x4 < Error16x16)
1223         {
1224             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1225             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1226             cpi->prediction_error += Error4x4;
1227         }
1228         else
1229         {
1230             x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1231             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1232             cpi->prediction_error += Error16x16;
1233         }
1234
1235         vp8_pick_intra_mbuv_mode(x);
1236         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1237         sum_intra_stats(cpi, x);
1238         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1239     }
1240
1241     return rate;
1242 }
1243 #ifdef SPEEDSTATS
1244 extern int cnt_pm;
1245 #endif
1246
1247 extern void vp8_fix_contexts(MACROBLOCKD *x);
1248
1249 int vp8cx_encode_inter_macroblock
1250 (
1251     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1252     int recon_yoffset, int recon_uvoffset
1253 )
1254 {
1255     MACROBLOCKD *const xd = &x->e_mbd;
1256     int inter_error;
1257     int intra_error = 0;
1258     int rate;
1259     int distortion;
1260
1261     x->skip = 0;
1262
1263     if (xd->segmentation_enabled)
1264         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1265     else
1266         x->encode_breakout = cpi->oxcf.encode_breakout;
1267
1268 #if !(CONFIG_REALTIME_ONLY)
1269
1270     if (cpi->sf.RD)
1271     {
1272         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1273     }
1274     else
1275 #endif
1276         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1277
1278
1279     cpi->prediction_error += inter_error;
1280     cpi->intra_error += intra_error;
1281
1282 #if 0
1283     // Experimental RD code
1284     cpi->frame_distortion += distortion;
1285     cpi->last_mb_distortion = distortion;
1286 #endif
1287
1288     // MB level adjutment to quantizer setup
1289     if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
1290     {
1291         // If cyclic update enabled
1292         if (cpi->cyclic_refresh_mode_enabled)
1293         {
1294             // Clear segment_id back to 0 if not coded (last frame 0,0)
1295             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1296                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1297             {
1298                 xd->mode_info_context->mbmi.segment_id = 0;
1299             }
1300         }
1301
1302         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1303         if (cpi->zbin_mode_boost_enabled)
1304         {
1305             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1306                  cpi->zbin_mode_boost = 0;
1307             else
1308             {
1309                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1310                 {
1311                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1312                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1313                     else
1314                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1315                 }
1316                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1317                     cpi->zbin_mode_boost = 0;
1318                 else
1319                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1320             }
1321         }
1322         else
1323             cpi->zbin_mode_boost = 0;
1324
1325         vp8cx_mb_init_quantizer(cpi,  x);
1326     }
1327
1328     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1329
1330     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1331     {
1332         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1333
1334         if (xd->mode_info_context->mbmi.mode == B_PRED)
1335         {
1336             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1337         }
1338         else
1339         {
1340             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1341         }
1342
1343         sum_intra_stats(cpi, x);
1344     }
1345     else
1346     {
1347         MV best_ref_mv;
1348         MV nearest, nearby;
1349         int mdcounts[4];
1350         int ref_fb_idx;
1351
1352         vp8_find_near_mvs(xd, xd->mode_info_context,
1353                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1354
1355         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1356
1357         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1358             ref_fb_idx = cpi->common.lst_fb_idx;
1359         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1360             ref_fb_idx = cpi->common.gld_fb_idx;
1361         else
1362             ref_fb_idx = cpi->common.alt_fb_idx;
1363
1364         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1365         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1366         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1367
1368         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1369         {
1370             int i;
1371
1372             for (i = 0; i < 16; i++)
1373             {
1374                 if (xd->block[i].bmi.mode == NEW4X4)
1375                 {
1376                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1377                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1378                 }
1379             }
1380         }
1381         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1382         {
1383             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1384             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1385         }
1386
1387         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1388         {
1389             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1390
1391             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1392             if (!cpi->common.mb_no_coeff_skip)
1393                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1394
1395         }
1396         else
1397             vp8_stuff_inter16x16(x);
1398     }
1399
1400     if (!x->skip)
1401         vp8_tokenize_mb(cpi, xd, t);
1402     else
1403     {
1404         if (cpi->common.mb_no_coeff_skip)
1405         {
1406             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1407                 xd->mode_info_context->mbmi.dc_diff = 0;
1408             else
1409                 xd->mode_info_context->mbmi.dc_diff = 1;
1410
1411             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1412             cpi->skip_true_count ++;
1413             vp8_fix_contexts(xd);
1414         }
1415         else
1416         {
1417             vp8_stuff_mb(cpi, xd, t);
1418             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1419             cpi->skip_false_count ++;
1420         }
1421     }
1422
1423     return rate;
1424 }