vp8/encoder/temporal_filter.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vp8/common/onyxc_int.h"
  13 #include "onyx_int.h"
  14 #include "vp8/common/systemdependent.h"
  15 #include "quantize.h"
  16 #include "vp8/common/alloccommon.h"
  17 #include "mcomp.h"
  18 #include "firstpass.h"
  19 #include "psnr.h"
  20 #include "vpx_scale/vpxscale.h"
  21 #include "vp8/common/extend.h"
  22 #include "ratectrl.h"
  23 #include "vp8/common/quant_common.h"
  24 #include "segmentation.h"
  25 #include "vpx_scale/yv12extend.h"
  26 #include "vpx_mem/vpx_mem.h"
  27 #include "vp8/common/swapyv12buffer.h"
  28 #include "vp8/common/threading.h"
  29 #include "vpx_ports/vpx_timer.h"
  30
  31 #include <math.h>
  32 #include <limits.h>
  33
  34 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
  35 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
  36
  37 #if VP8_TEMPORAL_ALT_REF
  38
  39 static void vp8_temporal_filter_predictors_mb_c
  40 (
  41     MACROBLOCKD *x,
  42     unsigned char *y_mb_ptr,
  43     unsigned char *u_mb_ptr,
  44     unsigned char *v_mb_ptr,
  45     int stride,
  46     int mv_row,
  47     int mv_col,
  48     unsigned char *pred
  49 )
  50 {
  51     int offset;
  52     unsigned char *yptr, *uptr, *vptr;
  53
  54     // Y
  55     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
  56
  57     if ((mv_row | mv_col) & 7)
  58     {
  59         x->subpixel_predict16x16(yptr, stride,
  60                                     mv_col & 7, mv_row & 7, &pred[0], 16);
  61     }
  62     else
  63     {
  64         vp8_copy_mem16x16(yptr, stride, &pred[0], 16);
  65     }
  66
  67     // U & V
  68     mv_row >>= 1;
  69     mv_col >>= 1;
  70     stride = (stride + 1) >> 1;
  71     offset = (mv_row >> 3) * stride + (mv_col >> 3);
  72     uptr = u_mb_ptr + offset;
  73     vptr = v_mb_ptr + offset;
  74
  75     if ((mv_row | mv_col) & 7)
  76     {
  77         x->subpixel_predict8x8(uptr, stride,
  78                             mv_col & 7, mv_row & 7, &pred[256], 8);
  79         x->subpixel_predict8x8(vptr, stride,
  80                             mv_col & 7, mv_row & 7, &pred[320], 8);
  81     }
  82     else
  83     {
  84         vp8_copy_mem8x8(uptr, stride, &pred[256], 8);
  85         vp8_copy_mem8x8(vptr, stride, &pred[320], 8);
  86     }
  87 }
  88 void vp8_temporal_filter_apply_c
  89 (
  90     unsigned char *frame1,
  91     unsigned int stride,
  92     unsigned char *frame2,
  93     unsigned int block_size,
  94     int strength,
  95     int filter_weight,
  96     unsigned int *accumulator,
  97     unsigned short *count
  98 )
  99 {
 100     unsigned int i, j, k;
 101     int modifier;
 102     int byte = 0;
 103
 104     for (i = 0,k = 0; i < block_size; i++)
 105     {
 106         for (j = 0; j < block_size; j++, k++)
 107         {
 108
 109             int src_byte = frame1[byte];
 110             int pixel_value = *frame2++;
 111
 112             modifier   = src_byte - pixel_value;
 113             // This is an integer approximation of:
 114             // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
 115             // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
 116             modifier  *= modifier;
 117             modifier  *= 3;
 118             modifier  += 1 << (strength - 1);
 119             modifier >>= strength;
 120
 121             if (modifier > 16)
 122                 modifier = 16;
 123
 124             modifier = 16 - modifier;
 125             modifier *= filter_weight;
 126
 127             count[k] += modifier;
 128             accumulator[k] += modifier * pixel_value;
 129
 130             byte++;
 131         }
 132
 133         byte += stride - block_size;
 134     }
 135 }
 136
 137 #if ALT_REF_MC_ENABLED
 138 static int dummy_cost[2*mv_max+1];
 139
 140 static int vp8_temporal_filter_find_matching_mb_c
 141 (
 142     VP8_COMP *cpi,
 143     YV12_BUFFER_CONFIG *arf_frame,
 144     YV12_BUFFER_CONFIG *frame_ptr,
 145     int mb_offset,
 146     int error_thresh
 147 )
 148 {
 149     MACROBLOCK *x = &cpi->mb;
 150     int step_param;
 151     int further_steps;
 152     int sadpb = x->sadperbit16;
 153     int bestsme = INT_MAX;
 154
 155     BLOCK *b = &x->block[0];
 156     BLOCKD *d = &x->e_mbd.block[0];
 157     int_mv best_ref_mv1;
 158     int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
 159
 160     int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
 161     int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
 162
 163     // Save input state
 164     unsigned char **base_src = b->base_src;
 165     int src = b->src;
 166     int src_stride = b->src_stride;
 167     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
 168     int pre = d->offset;
 169     int pre_stride = x->e_mbd.pre.y_stride;
 170
 171     best_ref_mv1.as_int = 0;
 172     best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
 173     best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
 174
 175     // Setup frame pointers
 176     b->base_src = &arf_frame->y_buffer;
 177     b->src_stride = arf_frame->y_stride;
 178     b->src = mb_offset;
 179
 180     x->e_mbd.pre.y_buffer = frame_ptr->y_buffer;
 181     x->e_mbd.pre.y_stride = frame_ptr->y_stride;
 182     d->offset = mb_offset;
 183
 184     // Further step/diamond searches as necessary
 185     if (cpi->Speed < 8)
 186     {
 187         step_param = cpi->sf.first_step +
 188                     (cpi->Speed > 5);
 189         further_steps =
 190             (cpi->sf.max_step_search_steps - 1)-step_param;
 191     }
 192     else
 193     {
 194         step_param = cpi->sf.first_step + 2;
 195         further_steps = 0;
 196     }
 197
 198     /*cpi->sf.search_method == HEX*/
 199     // TODO Check that the 16x16 vf & sdf are selected here
 200     bestsme = vp8_hex_search(x, b, d,
 201         &best_ref_mv1_full, &d->bmi.mv,
 202         step_param,
 203         sadpb,
 204         &cpi->fn_ptr[BLOCK_16X16],
 205         mvsadcost, mvcost, &best_ref_mv1);
 206
 207 #if ALT_REF_SUBPEL_ENABLED
 208     // Try sub-pixel MC?
 209     //if (bestsme > error_thresh && bestsme < INT_MAX)
 210     {
 211         int distortion;
 212         unsigned int sse;
 213         bestsme = cpi->find_fractional_mv_step(x, b, d,
 214                     &d->bmi.mv, &best_ref_mv1,
 215                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
 216                     mvcost, &distortion, &sse);
 217     }
 218 #endif
 219
 220     // Save input state
 221     b->base_src = base_src;
 222     b->src = src;
 223     b->src_stride = src_stride;
 224     x->e_mbd.pre.y_buffer = base_pre;
 225     d->offset = pre;
 226     x->e_mbd.pre.y_stride = pre_stride;
 227
 228     return bestsme;
 229 }
 230 #endif
 231
 232 static void vp8_temporal_filter_iterate_c
 233 (
 234     VP8_COMP *cpi,
 235     int frame_count,
 236     int alt_ref_index,
 237     int strength
 238 )
 239 {
 240     int byte;
 241     int frame;
 242     int mb_col, mb_row;
 243     unsigned int filter_weight;
 244     int mb_cols = cpi->common.mb_cols;
 245     int mb_rows = cpi->common.mb_rows;
 246     int mb_y_offset = 0;
 247     int mb_uv_offset = 0;
 248     DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
 249     DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
 250     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
 251     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
 252     unsigned char *dst1, *dst2;
 253     DECLARE_ALIGNED_ARRAY(16, unsigned char,  predictor, 16*16 + 8*8 + 8*8);
 254
 255     // Save input state
 256     unsigned char *y_buffer = mbd->pre.y_buffer;
 257     unsigned char *u_buffer = mbd->pre.u_buffer;
 258     unsigned char *v_buffer = mbd->pre.v_buffer;
 259
 260     for (mb_row = 0; mb_row < mb_rows; mb_row++)
 261     {
 262 #if ALT_REF_MC_ENABLED
 263         // Source frames are extended to 16 pixels.  This is different than
 264         //  L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
 265         // A 6 tap filter is used for motion search.  This requires 2 pixels
 266         //  before and 3 pixels after.  So the largest Y mv on a border would
 267         //  then be 16 - 3.  The UV blocks are half the size of the Y and
 268         //  therefore only extended by 8.  The largest mv that a UV block
 269         //  can support is 8 - 3.  A UV mv is half of a Y mv.
 270         //  (16 - 3) >> 1 == 6 which is greater than 8 - 3.
 271         // To keep the mv in play for both Y and UV planes the max that it
 272         //  can be on a border is therefore 16 - 5.
 273         cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
 274         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
 275                                 + (16 - 5);
 276 #endif
 277
 278         for (mb_col = 0; mb_col < mb_cols; mb_col++)
 279         {
 280             int i, j, k;
 281             int stride;
 282
 283             vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
 284             vpx_memset(count, 0, 384*sizeof(unsigned short));
 285
 286 #if ALT_REF_MC_ENABLED
 287             cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
 288             cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
 289                                     + (16 - 5);
 290 #endif
 291
 292             for (frame = 0; frame < frame_count; frame++)
 293             {
 294                 int err = 0;
 295
 296                 if (cpi->frames[frame] == NULL)
 297                     continue;
 298
 299                 mbd->block[0].bmi.mv.as_mv.row = 0;
 300                 mbd->block[0].bmi.mv.as_mv.col = 0;
 301
 302 #if ALT_REF_MC_ENABLED
 303 #define THRESH_LOW   10000
 304 #define THRESH_HIGH  20000
 305
 306                 // Find best match in this frame by MC
 307                 err = vp8_temporal_filter_find_matching_mb_c
 308                       (cpi,
 309                        cpi->frames[alt_ref_index],
 310                        cpi->frames[frame],
 311                        mb_y_offset,
 312                        THRESH_LOW);
 313
 314 #endif
 315                 // Assign higher weight to matching MB if it's error
 316                 // score is lower. If not applying MC default behavior
 317                 // is to weight all MBs equal.
 318                 filter_weight = err<THRESH_LOW
 319                                   ? 2 : err<THRESH_HIGH ? 1 : 0;
 320
 321                 if (filter_weight != 0)
 322                 {
 323                     // Construct the predictors
 324                     vp8_temporal_filter_predictors_mb_c
 325                         (mbd,
 326                          cpi->frames[frame]->y_buffer + mb_y_offset,
 327                          cpi->frames[frame]->u_buffer + mb_uv_offset,
 328                          cpi->frames[frame]->v_buffer + mb_uv_offset,
 329                          cpi->frames[frame]->y_stride,
 330                          mbd->block[0].bmi.mv.as_mv.row,
 331                          mbd->block[0].bmi.mv.as_mv.col,
 332                          predictor);
 333
 334                     // Apply the filter (YUV)
 335                     vp8_temporal_filter_apply
 336                         (f->y_buffer + mb_y_offset,
 337                          f->y_stride,
 338                          predictor,
 339                          16,
 340                          strength,
 341                          filter_weight,
 342                          accumulator,
 343                          count);
 344
 345                     vp8_temporal_filter_apply
 346                         (f->u_buffer + mb_uv_offset,
 347                          f->uv_stride,
 348                          predictor + 256,
 349                          8,
 350                          strength,
 351                          filter_weight,
 352                          accumulator + 256,
 353                          count + 256);
 354
 355                     vp8_temporal_filter_apply
 356                         (f->v_buffer + mb_uv_offset,
 357                          f->uv_stride,
 358                          predictor + 320,
 359                          8,
 360                          strength,
 361                          filter_weight,
 362                          accumulator + 320,
 363                          count + 320);
 364                 }
 365             }
 366
 367             // Normalize filter output to produce AltRef frame
 368             dst1 = cpi->alt_ref_buffer.y_buffer;
 369             stride = cpi->alt_ref_buffer.y_stride;
 370             byte = mb_y_offset;
 371             for (i = 0,k = 0; i < 16; i++)
 372             {
 373                 for (j = 0; j < 16; j++, k++)
 374                 {
 375                     unsigned int pval = accumulator[k] + (count[k] >> 1);
 376                     pval *= cpi->fixed_divide[count[k]];
 377                     pval >>= 19;
 378
 379                     dst1[byte] = (unsigned char)pval;
 380
 381                     // move to next pixel
 382                     byte++;
 383                 }
 384
 385                 byte += stride - 16;
 386             }
 387
 388             dst1 = cpi->alt_ref_buffer.u_buffer;
 389             dst2 = cpi->alt_ref_buffer.v_buffer;
 390             stride = cpi->alt_ref_buffer.uv_stride;
 391             byte = mb_uv_offset;
 392             for (i = 0,k = 256; i < 8; i++)
 393             {
 394                 for (j = 0; j < 8; j++, k++)
 395                 {
 396                     int m=k+64;
 397
 398                     // U
 399                     unsigned int pval = accumulator[k] + (count[k] >> 1);
 400                     pval *= cpi->fixed_divide[count[k]];
 401                     pval >>= 19;
 402                     dst1[byte] = (unsigned char)pval;
 403
 404                     // V
 405                     pval = accumulator[m] + (count[m] >> 1);
 406                     pval *= cpi->fixed_divide[count[m]];
 407                     pval >>= 19;
 408                     dst2[byte] = (unsigned char)pval;
 409
 410                     // move to next pixel
 411                     byte++;
 412                 }
 413
 414                 byte += stride - 8;
 415             }
 416
 417             mb_y_offset += 16;
 418             mb_uv_offset += 8;
 419         }
 420
 421         mb_y_offset += 16*(f->y_stride-mb_cols);
 422         mb_uv_offset += 8*(f->uv_stride-mb_cols);
 423     }
 424
 425     // Restore input state
 426     mbd->pre.y_buffer = y_buffer;
 427     mbd->pre.u_buffer = u_buffer;
 428     mbd->pre.v_buffer = v_buffer;
 429 }
 430
 431 void vp8_temporal_filter_prepare_c
 432 (
 433     VP8_COMP *cpi,
 434     int distance
 435 )
 436 {
 437     int frame = 0;
 438
 439     int num_frames_backward = 0;
 440     int num_frames_forward = 0;
 441     int frames_to_blur_backward = 0;
 442     int frames_to_blur_forward = 0;
 443     int frames_to_blur = 0;
 444     int start_frame = 0;
 445
 446     int strength = cpi->oxcf.arnr_strength;
 447
 448     int blur_type = cpi->oxcf.arnr_type;
 449
 450     int max_frames = cpi->active_arnr_frames;
 451
 452     num_frames_backward = distance;
 453     num_frames_forward = vp8_lookahead_depth(cpi->lookahead)
 454                          - (num_frames_backward + 1);
 455
 456     switch (blur_type)
 457     {
 458     case 1:
 459         /////////////////////////////////////////
 460         // Backward Blur
 461
 462         frames_to_blur_backward = num_frames_backward;
 463
 464         if (frames_to_blur_backward >= max_frames)
 465             frames_to_blur_backward = max_frames - 1;
 466
 467         frames_to_blur = frames_to_blur_backward + 1;
 468         break;
 469
 470     case 2:
 471         /////////////////////////////////////////
 472         // Forward Blur
 473
 474         frames_to_blur_forward = num_frames_forward;
 475
 476         if (frames_to_blur_forward >= max_frames)
 477             frames_to_blur_forward = max_frames - 1;
 478
 479         frames_to_blur = frames_to_blur_forward + 1;
 480         break;
 481
 482     case 3:
 483     default:
 484         /////////////////////////////////////////
 485         // Center Blur
 486         frames_to_blur_forward = num_frames_forward;
 487         frames_to_blur_backward = num_frames_backward;
 488
 489         if (frames_to_blur_forward > frames_to_blur_backward)
 490             frames_to_blur_forward = frames_to_blur_backward;
 491
 492         if (frames_to_blur_backward > frames_to_blur_forward)
 493             frames_to_blur_backward = frames_to_blur_forward;
 494
 495         // When max_frames is even we have 1 more frame backward than forward
 496         if (frames_to_blur_forward > (max_frames - 1) / 2)
 497             frames_to_blur_forward = ((max_frames - 1) / 2);
 498
 499         if (frames_to_blur_backward > (max_frames / 2))
 500             frames_to_blur_backward = (max_frames / 2);
 501
 502         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
 503         break;
 504     }
 505
 506     start_frame = distance + frames_to_blur_forward;
 507
 508 #ifdef DEBUGFWG
 509     // DEBUG FWG
 510     printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
 511            , max_frames
 512            , num_frames_backward
 513            , num_frames_forward
 514            , frames_to_blur
 515            , frames_to_blur_backward
 516            , frames_to_blur_forward
 517            , cpi->source_encode_index
 518            , cpi->last_alt_ref_sei
 519            , start_frame);
 520 #endif
 521
 522     // Setup frame pointers, NULL indicates frame not included in filter
 523     vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
 524     for (frame = 0; frame < frames_to_blur; frame++)
 525     {
 526         int which_buffer =  start_frame - frame;
 527         struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead,
 528                                                          which_buffer,
 529                                                          PEEK_FORWARD);
 530         cpi->frames[frames_to_blur-1-frame] = &buf->img;
 531     }
 532
 533     vp8_temporal_filter_iterate_c (
 534         cpi,
 535         frames_to_blur,
 536         frames_to_blur_backward,
 537         strength );
 538 }
 539 #endif