Add half-pixel variance RTCD functions

author John Koleszar <jkoleszar@google.com>

Tue, 26 Oct 2010 19:34:16 +0000 (15:34 -0400)

committer John Koleszar <jkoleszar@google.com>

Wed, 27 Oct 2010 03:00:56 +0000 (20:00 -0700)
author John Koleszar <jkoleszar@google.com>
Tue, 26 Oct 2010 19:34:16 +0000 (15:34 -0400)
committer John Koleszar <jkoleszar@google.com>
Wed, 27 Oct 2010 03:00:56 +0000 (20:00 -0700)
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c

deleted file mode 100644 (file)

index 27146e2..0000000
--- a/vp8/encoder/arm/mcomp_arm.c
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "mcomp.h"
-#include "vpx_mem/vpx_mem.h"
-
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
-
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
-extern unsigned int vp8_sub_pixel_variance16x16s_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-
-
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
-    int bestmse = INT_MAX;
-    MV startmv;
-    //MV this_mv;
-    MV this_mv;
-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
-    unsigned char *z = (*(b->base_src) + b->src);
-    int left, right, up, down, diag;
-    unsigned int sse;
-    int whichdir ;
-
-
-    // Trap uncodable vectors
-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->row <<= 3;
-        bestmv->col <<= 3;
-        return INT_MAX;
-    }
-
-    // central mv
-    bestmv->row <<= 3;
-    bestmv->col <<= 3;
-    startmv = *bestmv;
-
-    // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-    this_mv.col = ((startmv.col - 8) | 4);
-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 8;
-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-    this_mv.row = ((startmv.row - 8) | 4);
-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 8;
-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-
-    // now check 1 more diagonal
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    //for(whichdir =0;whichdir<4;whichdir++)
-    //{
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 1:
-        this_mv.col += 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 2:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row += 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 3:
-        this_mv.col += 4;
-        this_mv.row += 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-//  }
-
-
-    // time to check quarter pels.
-    if (bestmv->row < startmv.row)
-        y -= d->pre_stride;
-
-    if (bestmv->col < startmv.col)
-        y--;
-
-    startmv = *bestmv;
-
-
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-
-    if (startmv.col & 7)
-    {
-        this_mv.col = startmv.col - 2;
-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-    else
-    {
-        this_mv.col = (startmv.col - 8) | 6;
-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 4;
-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-
-    if (startmv.row & 7)
-    {
-        this_mv.row = startmv.row - 2;
-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-    else
-    {
-        this_mv.row = (startmv.row - 8) | 6;
-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-    }
-
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 4;
-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-
-    // now check 1 more diagonal
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-//  for(whichdir=0;whichdir<4;whichdir++)
-//  {
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-
-        if (startmv.row & 7)
-        {
-            this_mv.row -= 2;
-
-            if (startmv.col & 7)
-            {
-                this_mv.col -= 2;
-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-            }
-            else
-            {
-                this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
-            }
-        }
-        else
-        {
-            this_mv.row = (startmv.row - 8) | 6;
-
-            if (startmv.col & 7)
-            {
-                this_mv.col -= 2;
-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-            }
-            else
-            {
-                this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
-            }
-        }
-
-        break;
-    case 1:
-        this_mv.col += 2;
-
-        if (startmv.row & 7)
-        {
-            this_mv.row -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        }
-        else
-        {
-            this_mv.row = (startmv.row - 8) | 6;
-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-        }
-
-        break;
-    case 2:
-        this_mv.row += 2;
-
-        if (startmv.col & 7)
-        {
-            this_mv.col -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        }
-        else
-        {
-            this_mv.col = (startmv.col - 8) | 6;
-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
-        }
-
-        break;
-    case 3:
-        this_mv.col += 2;
-        this_mv.row += 2;
-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-//  }
-
-    return bestmse;
-}
-
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
-    int bestmse = INT_MAX;
-    MV startmv;
-    //MV this_mv;
-    MV this_mv;
-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
-    unsigned char *z = (*(b->base_src) + b->src);
-    int left, right, up, down, diag;
-    unsigned int sse;
-
-    // Trap uncodable vectors
-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->row <<= 3;
-        bestmv->col <<= 3;
-        return INT_MAX;
-    }
-
-    // central mv
-    bestmv->row <<= 3;
-    bestmv->col <<= 3;
-    startmv = *bestmv;
-
-    // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-    this_mv.col = ((startmv.col - 8) | 4);
-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 8;
-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-    this_mv.row = ((startmv.row - 8) | 4);
-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 8;
-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-    // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
-#if 0
-    // now check 1 more diagonal -
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 1:
-        this_mv.col += 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 2:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 3:
-        this_mv.col += 4;
-        this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-#else
-    this_mv.col = (this_mv.col - 8) | 4;
-    this_mv.row = (this_mv.row - 8) | 4;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col += 8;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col = (this_mv.col - 8) | 4;
-    this_mv.row = startmv.row + 4;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col += 8;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-#endif
-    return bestmse;
-}
-
-
-#ifdef ENTROPY_STATS
-void print_mode_context(void)
-{
-    FILE *f = fopen("modecont.c", "w");
-    int i, j;
-
-    fprintf(f, "#include \"entropy.h\"\n");
-    fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
-    fprintf(f, "{\n");
-
-    for (j = 0; j < 6; j++)
-    {
-        fprintf(f, "  { // %d \n", j);
-        fprintf(f, "    ");
-
-        for (i = 0; i < 4; i++)
-        {
-            int overal_prob;
-            int this_prob;
-            int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
-
-            // Overall probs
-            count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
-
-            if (count)
-                overal_prob = 256 * mv_mode_cts[i][0] / count;
-            else
-                overal_prob = 128;
-
-            if (overal_prob == 0)
-                overal_prob = 1;
-
-            // context probs
-            count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
-
-            if (count)
-                this_prob = 256 * mv_ref_ct[j][i][0] / count;
-            else
-                this_prob = 128;
-
-            if (this_prob == 0)
-                this_prob = 1;
-
-            fprintf(f, "%5d, ", this_prob);
-            //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
-            //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
-        }
-
-        fprintf(f, "  },\n");
-    }
-
-    fprintf(f, "};\n");
-    fclose(f);
-}
-
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
-void init_mv_ref_counts()
-{
-    vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
-    vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
-{
-    if (m == ZEROMV)
-    {
-        ++mv_ref_ct [ct[0]] [0] [0];
-        ++mv_mode_cts[0][0];
-    }
-    else
-    {
-        ++mv_ref_ct [ct[0]] [0] [1];
-        ++mv_mode_cts[0][1];
-
-        if (m == NEARESTMV)
-        {
-            ++mv_ref_ct [ct[1]] [1] [0];
-            ++mv_mode_cts[1][0];
-        }
-        else
-        {
-            ++mv_ref_ct [ct[1]] [1] [1];
-            ++mv_mode_cts[1][1];
-
-            if (m == NEARMV)
-            {
-                ++mv_ref_ct [ct[2]] [2] [0];
-                ++mv_mode_cts[2][0];
-            }
-            else
-            {
-                ++mv_ref_ct [ct[2]] [2] [1];
-                ++mv_mode_cts[2][1];
-
-                if (m == NEWMV)
-                {
-                    ++mv_ref_ct [ct[3]] [3] [0];
-                    ++mv_mode_cts[3][0];
-                }
-                else
-                {
-                    ++mv_ref_ct [ct[3]] [3] [1];
-                    ++mv_mode_cts[3][1];
-                }
-            }
-        }
-    }
-}
-
-#endif/* END MV ref count ENTROPY_STATS stats code */
-
-#endif
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm

index 1c1441c..0a2b71c 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -9,9 +9,9 @@
  ;
  
  
-    EXPORT  |vp8_sub_pixel_variance16x16s_4_0_neon|
-    EXPORT  |vp8_sub_pixel_variance16x16s_0_4_neon|
-    EXPORT  |vp8_sub_pixel_variance16x16s_4_4_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_h_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_v_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_hv_neon|
      EXPORT  |vp8_sub_pixel_variance16x16s_neon|
      ARM
      REQUIRE8
@@ -20,7 +20,7 @@
      AREA ||.text||, CODE, READONLY, ALIGN=2
  
  ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
+;unsigned int vp8_variance_halfpixvar16x16_h_neon
  ;(
  ;    unsigned char  *src_ptr, r0
  ;    int  src_pixels_per_line,  r1
@@ -29,7 +29,7 @@
  ;    unsigned int *sse
  ;);
  ;================================================
-|vp8_sub_pixel_variance16x16s_4_0_neon| PROC
+|vp8_variance_halfpixvar16x16_h_neon| PROC
      push            {lr}
  
      mov             r12, #4                  ;loop counter
@@ -120,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
      ENDP
  
  ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_v_neon
  ;(
  ;    unsigned char  *src_ptr, r0
  ;    int  src_pixels_per_line,  r1
@@ -129,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
  ;    unsigned int *sse
  ;);
  ;================================================
-|vp8_sub_pixel_variance16x16s_0_4_neon| PROC
+|vp8_variance_halfpixvar16x16_v_neon| PROC
      push            {lr}
  
      mov             r12, #4                     ;loop counter
@@ -216,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
      ENDP
  
  ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_hv_neon
  ;(
  ;    unsigned char  *src_ptr, r0
  ;    int  src_pixels_per_line,  r1
@@ -225,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
  ;    unsigned int *sse
  ;);
  ;================================================
-|vp8_sub_pixel_variance16x16s_4_4_neon| PROC
+|vp8_variance_halfpixvar16x16_hv_neon| PROC
      push            {lr}
  
      vld1.u8         {d0, d1, d2, d3}, [r0], r1      ;load src data
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h

index fb9dd5a..0e5f62f 100644 (file)
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -30,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
  //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
  //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
  extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
  
  //extern prototype_getmbss(vp8_get_mb_ss_c);
  extern prototype_variance(vp8_mse16x16_neon);
@@ -84,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
  #undef  vp8_variance_subpixvar16x16
  #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon
  
+#undef  vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
+
+#undef  vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon
+
+#undef  vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon
+
  //#undef  vp8_variance_getmbss
  //#define vp8_variance_getmbss vp8_get_mb_ss_c
  
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c

index 607c3d2..691aee0 100644 (file)
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -462,12 +462,11 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
      int step_param = 3;                                       //3;          // Dont search over full range for first pass
      int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
      int n;
-    vp8_variance_fn_ptr_t v_fn_ptr;
+    vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
      int new_mv_mode_penalty = 256;
  
+    // override the default variance function to use MSE
      v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
-    v_fn_ptr.sdf   = cpi->fn_ptr.sdf;
-    v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df;
  
      // Set up pointers for this macro block recon buffer
      xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c

index c5cae9c..8cc63f8 100644 (file)
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -186,7 +186,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
  #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
  #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
  #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
+#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
  #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
  #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
  #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
@@ -195,7 +195,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
  
  //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
  
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
  {
      unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
      unsigned char *z = (*(b->base_src) + b->src);
@@ -220,7 +220,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
      bestmv->col <<= 3;
  
      // calculate central point error
-    besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
      besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  
      // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
@@ -309,7 +309,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  #undef CHECK_BETTER
  #undef MIN
  #undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
  {
      int bestmse = INT_MAX;
      MV startmv;
@@ -336,13 +336,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      startmv = *bestmv;
  
      // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
      bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  
      // go left then right and check error
      this_mv.row = startmv.row;
      this_mv.col = ((startmv.col - 8) | 4);
-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
      left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (left < bestmse)
@@ -352,7 +352,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      }
  
      this_mv.col += 8;
-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
      right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (right < bestmse)
@@ -364,7 +364,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      // go up then down and check error
      this_mv.col = startmv.col;
      this_mv.row = ((startmv.row - 8) | 4);
-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
      up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (up < bestmse)
@@ -374,7 +374,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      }
  
      this_mv.row += 8;
-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
      down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (down < bestmse)
@@ -386,10 +386,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
  
      // now check 1 more diagonal
      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    // whichdir must be 0-4. Therefore, one of the cases below
-    // must run through. However, because there is no default
-    // and diag is not set elsewhere, we get a compile warning
-    diag = 0;
      //for(whichdir =0;whichdir<4;whichdir++)
      //{
      this_mv = startmv;
@@ -399,22 +395,22 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      case 0:
          this_mv.col = (this_mv.col - 8) | 4;
          this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
          break;
      case 1:
          this_mv.col += 4;
          this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
          break;
      case 2:
          this_mv.col = (this_mv.col - 8) | 4;
          this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
          break;
      case 3:
          this_mv.col += 4;
          this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
          break;
      }
  
@@ -446,12 +442,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      if (startmv.col & 7)
      {
          this_mv.col = startmv.col - 2;
-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
      }
      else
      {
          this_mv.col = (startmv.col - 8) | 6;
-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
+        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
      }
  
      left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -463,7 +459,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      }
  
      this_mv.col += 4;
-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
      right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (right < bestmse)
@@ -478,12 +474,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      if (startmv.row & 7)
      {
          this_mv.row = startmv.row - 2;
-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
      }
      else
      {
          this_mv.row = (startmv.row - 8) | 6;
-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
      }
  
      up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -495,7 +491,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      }
  
      this_mv.row += 4;
-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
      down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (down < bestmse)
@@ -523,12 +519,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
              if (startmv.col & 7)
              {
                  this_mv.col -= 2;
-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+                diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
              }
              else
              {
                  this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+                diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
              }
          }
          else
@@ -538,12 +534,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
              if (startmv.col & 7)
              {
                  this_mv.col -= 2;
-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+                diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
              }
              else
              {
                  this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
+                diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
              }
          }
  
@@ -554,12 +550,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
          if (startmv.row & 7)
          {
              this_mv.row -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
          }
          else
          {
              this_mv.row = (startmv.row - 8) | 6;
-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+            diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
          }
  
          break;
@@ -569,19 +565,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
          if (startmv.col & 7)
          {
              this_mv.col -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
          }
          else
          {
              this_mv.col = (startmv.col - 8) | 6;
-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+            diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
          }
  
          break;
      case 3:
          this_mv.col += 2;
          this_mv.row += 2;
-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
          break;
      }
  
@@ -598,7 +594,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
      return bestmse;
  }
  
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
  {
      int bestmse = INT_MAX;
      MV startmv;
@@ -623,13 +619,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      startmv = *bestmv;
  
      // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
      bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  
      // go left then right and check error
      this_mv.row = startmv.row;
      this_mv.col = ((startmv.col - 8) | 4);
-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
      left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (left < bestmse)
@@ -639,7 +635,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      }
  
      this_mv.col += 8;
-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
      right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (right < bestmse)
@@ -651,7 +647,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      // go up then down and check error
      this_mv.col = startmv.col;
      this_mv.row = ((startmv.row - 8) | 4);
-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
      up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (up < bestmse)
@@ -661,7 +657,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      }
  
      this_mv.row += 8;
-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
      down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (down < bestmse)
@@ -681,22 +677,22 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      case 0:
          this_mv.col = (this_mv.col - 8) | 4;
          this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
          break;
      case 1:
          this_mv.col += 4;
          this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
          break;
      case 2:
          this_mv.col = (this_mv.col - 8) | 4;
          this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
          break;
      case 3:
          this_mv.col += 4;
          this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
          break;
      }
  
@@ -711,7 +707,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
  #else
      this_mv.col = (this_mv.col - 8) | 4;
      this_mv.row = (this_mv.row - 8) | 4;
-    diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
      diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (diag < bestmse)
@@ -721,7 +717,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      }
  
      this_mv.col += 8;
-    diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
      diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (diag < bestmse)
@@ -732,7 +728,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
  
      this_mv.col = (this_mv.col - 8) | 4;
      this_mv.row = startmv.row + 4;
-    diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
      diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (diag < bestmse)
@@ -742,7 +738,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      }
  
      this_mv.col += 8;
-    diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
      diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  
      if (diag < bestmse)
@@ -758,7 +754,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
  
  #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
  #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
+#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
  #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
  #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
  static const MV next_chkpts[6][3] =
@@ -780,8 +776,7 @@ int vp8_hex_search
      int search_param,
      int error_per_bit,
      int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t      sf,
+    const vp8_variance_fn_ptr_t *vfp,
      int *mvsadcost[2],
      int *mvcost[2]
  )
@@ -896,7 +891,7 @@ cal_neighbors:
      best_mv->row = br;
      best_mv->col = bc;
  
-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
+    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
  }
  #undef MVC
  #undef PRE
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h

index 7cc9242..181e958 100644 (file)
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -42,14 +42,15 @@ extern int vp8_hex_search
      int search_param,
      int error_per_bit,
      int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t sf,
+    const vp8_variance_fn_ptr_t *vf,
      int *mvsadcost[2],
      int *mvcost[2]
  
  );
  
-typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]);
+typedef int (fractional_mv_step_fp)
+    (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
+     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);
  extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
  extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
  extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index bd41b2c..7a78b29 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2334,11 +2334,50 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
  
      vp8cx_create_encoder_threads(cpi);
  
-    cpi->fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
-    cpi->fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
-    cpi->fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
-    cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
-    cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+    cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
+    cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
+    cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
+    cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+    cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+
+    cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
+    cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
+    cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+    cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
+
+    cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
+    cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
+    cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+    cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
+
+    cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
+    cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
+    cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+    cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
+
+    cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
+    cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
+    cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+    cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
  
  #if !(CONFIG_REALTIME_ONLY)
      cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index eb516e9..81e32f0 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -229,6 +229,16 @@ typedef struct VP8_ENCODER_RTCD
      vp8_search_rtcd_vtable_t    search;
  } VP8_ENCODER_RTCD;
  
+enum
+{
+    BLOCK_16X8,
+    BLOCK_8X16,
+    BLOCK_8X8,
+    BLOCK_4X4,
+    BLOCK_16X16,
+    BLOCK_MAX_SEGMENTS
+};
+
  typedef struct
  {
  
@@ -591,7 +601,7 @@ typedef struct
      fractional_mv_step_fp *find_fractional_mv_step;
      vp8_full_search_fn_t full_search_sad;
      vp8_diamond_search_fn_t diamond_search_sad;
-    vp8_variance_fn_ptr_t fn_ptr;
+    vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
      unsigned int time_receive_data;
      unsigned int time_compress_data;
      unsigned int time_pick_lpf;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c

index 8821b3a..2f7dd9c 100644 (file)
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -50,14 +50,13 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
  extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
  
  
-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
  {
      (void) b;
      (void) d;
      (void) ref_mv;
      (void) error_per_bit;
-    (void) svf;
-    (void) vf;
+    (void) vfp;
      (void) mvcost;
      bestmv->row <<= 3;
      bestmv->col <<= 3;
@@ -65,7 +64,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv,
  }
  
  
-static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse)
+static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse)
  {
  
      BLOCK *b = &mb->block[0];
@@ -81,11 +80,11 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v
  
      if (xoffset | yoffset)
      {
-        return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
+        return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
      }
      else
      {
-        return vf(what, what_stride, in_what, in_what_stride, sse);
+        return vfp->vf(what, what_stride, in_what, in_what_stride, sse);
      }
  
  }
@@ -719,13 +718,13 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
  
              if (cpi->sf.search_method == HEX)
              {
-                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
                  mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                  mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
              }
              else
              {
-                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
                  mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                  mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
  
@@ -744,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
                          num00--;
                      else
                      {
-                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
  
                          if (thissme < bestsme)
                          {
@@ -765,7 +764,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
          }
  
          if (bestsme < INT_MAX)
-            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost);
+            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
  
          mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
          mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -795,7 +794,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
              x->e_mbd.block[0].bmi.mode = this_mode;
              x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
  
-            distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));
+            distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
  
              this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
  
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c

index dbef85b..8f406b9 100644 (file)
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1130,6 +1130,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
      int bsd = 0;
      int bestsegmentyrate = 0;
  
+    static const int segmentation_to_sseshift[4] = {3, 3, 2, 0};
+
      // FIX TO Rd error outrange bug PGW 9 june 2004
      B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
                                      ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
@@ -1151,10 +1153,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
          int rate = 0;
          int sbr = 0;
          int sbd = 0;
-        int UNINITIALIZED_IS_SAFE(sseshift);
+        int sseshift;
          int segmentyrate = 0;
  
-        vp8_variance_fn_ptr_t v_fn_ptr;
+        vp8_variance_fn_ptr_t *v_fn_ptr;
  
          ENTROPY_CONTEXT_PLANES t_above, t_left;
          ENTROPY_CONTEXT *ta;
@@ -1174,42 +1176,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
          br = 0;
          bd = 0;
  
-        switch (segmentation)
-        {
-        case 0:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
-            sseshift = 3;
-            break;
-        case 1:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
-            sseshift = 3;
-            break;
-        case 2:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
-            sseshift = 2;
-            break;
-        case 3:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
-            sseshift = 0;
-            break;
-        }
-
+        v_fn_ptr = &cpi->fn_ptr[segmentation];
+        sseshift = segmentation_to_sseshift[segmentation];
          labels = vp8_mbsplits[segmentation];
          label_count = vp8_count_labels(labels);
  
@@ -1281,10 +1249,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                          int sadpb = x->sadperbit4;
  
                          if (cpi->sf.search_method == HEX)
-                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost);
+                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
                          else
                          {
-                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
  
                              n = num00;
                              num00 = 0;
@@ -1297,7 +1265,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                                      num00--;
                                  else
                                  {
-                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
  
                                      if (thissme < bestsme)
                                      {
@@ -1312,7 +1280,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                          // Should we do a full search (best quality only)
                          if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)
                          {
-                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost);
+                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);
  
                              if (thissme < bestsme)
                              {
@@ -1330,9 +1298,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                      if (bestsme < INT_MAX)
                      {
                          if (!fullpixel)
-                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost);
                          else
-                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost);
                      }
                  }
  
@@ -1852,13 +1820,13 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
  
                      if (cpi->sf.search_method == HEX)
                      {
-                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
                          mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                          mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
                      }
                      else
                      {
-                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
                          mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                          mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
  
@@ -1877,7 +1845,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                                  num00--;
                              else
                              {
-                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
  
                                  if (thissme < bestsme)
                                  {
@@ -1914,7 +1882,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                      search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
                      {
                          int sadpb = x->sadperbit16 >> 2;
-                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost);
+                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);
                      }
  
                      // Barrier threshold to initiating full search
@@ -1939,7 +1907,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
  
                  if (bestsme < INT_MAX)
                      // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11
-                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost);
+                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
  
                  mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                  mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c

index 630afdb..fd5dd7e 100644 (file)
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -234,7 +234,7 @@ static int find_matching_mb
              &best_ref_mv1, &d->bmi.mv.as_mv,
              step_param,
              sadpb/*x->errorperbit*/,
-            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
+            &num00, &cpi->fn_ptr[BLOCK_16X16],
              mvsadcost, mvcost);
      }
      else
@@ -245,7 +245,7 @@ static int find_matching_mb
              &best_ref_mv1, &d->bmi.mv.as_mv,
              step_param,
              sadpb / 2/*x->errorperbit*/,
-            &num00, &cpi->fn_ptr,
+            &num00, &cpi->fn_ptr[BLOCK_16X16],
              mvsadcost, mvcost); //sadpb < 9
  
          // Further step/diamond searches as necessary
@@ -267,7 +267,7 @@ static int find_matching_mb
                      &best_ref_mv1, &d->bmi.mv.as_mv,
                      step_param + n,
                      sadpb / 4/*x->errorperbit*/,
-                    &num00, &cpi->fn_ptr,
+                    &num00, &cpi->fn_ptr[BLOCK_16X16],
                      mvsadcost, mvcost); //sadpb = 9
  
                  if (thissme < bestsme)
@@ -291,8 +291,8 @@ static int find_matching_mb
      {
          bestsme = cpi->find_fractional_mv_step(x, b, d,
                      &d->bmi.mv.as_mv, &best_ref_mv1,
-                    x->errorperbit, cpi->fn_ptr.svf,
-                    cpi->fn_ptr.vf, cpi->mb.mvcost);
+                    x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
+                    cpi->mb.mvcost);
      }
  #endif
  
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h

index 3c9ae98..f60038f 100644 (file)
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -219,6 +219,21 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
  #endif
  extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
  
+#ifndef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_h);
+
+#ifndef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_v);
+
+#ifndef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
+
  #ifndef vp8_variance_subpixmse16x16
  #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
  #endif
@@ -283,6 +298,9 @@ typedef struct
      vp8_subpixvariance_fn_t  subpixvar8x16;
      vp8_subpixvariance_fn_t  subpixvar16x8;
      vp8_subpixvariance_fn_t  subpixvar16x16;
+    vp8_variance_fn_t        halfpixvar16x16_h;
+    vp8_variance_fn_t        halfpixvar16x16_v;
+    vp8_variance_fn_t        halfpixvar16x16_hv;
      vp8_subpixvariance_fn_t  subpixmse16x16;
  
      vp8_getmbss_fn_t         getmbss;
@@ -309,11 +327,14 @@ typedef struct
  
  typedef struct
  {
-    vp8_sad_fn_t  sdf;
-    vp8_sad_multi_fn_t sdx3f;
-    vp8_sad_multi_d_fn_t sdx4df;
-    vp8_variance_fn_t vf;
+    vp8_sad_fn_t            sdf;
+    vp8_variance_fn_t       vf;
      vp8_subpixvariance_fn_t svf;
+    vp8_variance_fn_t       svf_halfpix_h;
+    vp8_variance_fn_t       svf_halfpix_v;
+    vp8_variance_fn_t       svf_halfpix_hv;
+    vp8_sad_multi_fn_t      sdx3f;
+    vp8_sad_multi_d_fn_t    sdx4df;
  } vp8_variance_fn_ptr_t;
  
  #if CONFIG_RUNTIME_CPU_DETECT
@@ -322,7 +343,4 @@ typedef struct
  #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
  #endif
  
-/* TODO: Determine if this USEBILINEAR flag is necessary. */
-#define USEBILINEAR
-
  #endif
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c

index 1774143..48d5bb5 100644 (file)
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -24,7 +24,6 @@ const int vp8_six_tap[8][6] =
  };
  
  
-#ifdef USEBILINEAR
  const int VP8_FILTER_WEIGHT = 128;
  const int VP8_FILTER_SHIFT  =   7;
  const int vp8_bilinear_taps[8][2] =
@@ -461,6 +460,19 @@ unsigned int vp8_sub_pixel_variance16x16_c
      return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
  }
  
+
+unsigned int vp8_half_pixel_variance16x16_c(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
+                                         ref_ptr, recon_stride, sse);
+}
+
+
  unsigned int vp8_sub_pixel_mse16x16_c
  (
      const unsigned char  *src_ptr,
@@ -525,4 +537,3 @@ unsigned int vp8_sub_pixel_variance8x16_c
  
      return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
  }
-#endif
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk

index d126faf..da27e08 100644 (file)
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -19,7 +19,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
  
  VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
author	John Koleszar <jkoleszar@google.com>
	Tue, 26 Oct 2010 19:34:16 +0000 (15:34 -0400)
committer	John Koleszar <jkoleszar@google.com>
	Wed, 27 Oct 2010 03:00:56 +0000 (20:00 -0700)
vp8/encoder/arm/mcomp_arm.c	[deleted file]	patch \| blob \| history
vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm		patch \| blob \| history
vp8/encoder/arm/variance_arm.h		patch \| blob \| history
vp8/encoder/firstpass.c		patch \| blob \| history
vp8/encoder/mcomp.c		patch \| blob \| history
vp8/encoder/mcomp.h		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history
vp8/encoder/pickinter.c		patch \| blob \| history
vp8/encoder/rdopt.c		patch \| blob \| history
vp8/encoder/temporal_filter.c		patch \| blob \| history
vp8/encoder/variance.h		patch \| blob \| history
vp8/encoder/variance_c.c		patch \| blob \| history
vp8/vp8cx_arm.mk		patch \| blob \| history