From 00f9eb659034cb5cef0bf063c1b72c72c6333f36 Mon Sep 17 00:00:00 2001
From: Deb Mukherjee <debargha@google.com>
Date: Thu, 26 Jul 2012 13:42:07 -0700
Subject: [PATCH] New motion vector entropy coding

Adds a new experiment with redesigned/refactored motion vector entropy
coding. The patch also takes a first step towards separating the
integer and fractional pel components of a MV. However the fractional
pel encoding still depends on the integer pel part and so they are
not fully independent. Further experiments are in progress to see
how much they can be decoupled without affecting performance.
All components including entropy coding/decoding, costing for MV
search, forward updates and backward updates to probability tables,
have been implemented.

Results so far:
derf: +0.19%
std-hd: +0.28%
yt: +0.80%
hd: +1.15%

Patch: Simplifies the fractional pel models:
derf: +0.284%
std-hd: +0.289%
yt: +0.849%
hd: +1.254%

Patch: Some changes in the models, rebased.
derf: +0.330%
std-hd: +0.306%
yt: +0.816%
hd: +1.225%

Change-Id: I646b3c48f3587f4cc909639b78c3798da6402678
---
 configure                     |   1 +
 vp8/common/entropymv.c        | 470 ++++++++++++++++++++++++++++++++++++-
 vp8/common/entropymv.h        | 119 +++++++++-
 vp8/common/onyxc_int.h        |   9 +
 vp8/common/reconinter.c       |   6 +-
 vp8/common/treecoder.c        |  12 +
 vp8/common/treecoder.h        |   1 +
 vp8/decoder/decodemv.c        | 196 +++++++++++++++-
 vp8/decoder/decodframe.c      |  16 +-
 vp8/encoder/bitstream.c       |  67 +++++-
 vp8/encoder/block.h           |  16 +-
 vp8/encoder/encodeframe.c     |   7 +-
 vp8/encoder/encodemv.c        | 533 +++++++++++++++++++++++++++++++++++++++++-
 vp8/encoder/encodemv.h        |  28 ++-
 vp8/encoder/firstpass.c       |   9 +-
 vp8/encoder/mbgraph.c         |  12 +-
 vp8/encoder/mcomp.c           | 359 ++++++++++++++++------------
 vp8/encoder/mcomp.h           |  30 ++-
 vp8/encoder/onyx_if.c         |  84 ++++++-
 vp8/encoder/onyx_int.h        |  11 +
 vp8/encoder/ratectrl.c        |  31 ++-
 vp8/encoder/rdopt.c           | 103 ++++++--
 vp8/encoder/temporal_filter.c |   6 +-
 23 files changed, 1891 insertions(+), 235 deletions(-)

diff --git a/configure b/configure
index 2180ae3..168da07 100755
--- a/configure
+++ b/configure
@@ -228,6 +228,7 @@ EXPERIMENT_LIST="
     newbestrefmv
     new_mvref
     hybridtransform16x16
+    newmventropy
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 1664b28..9d7d566 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -12,6 +12,443 @@
 #include "onyxc_int.h"
 #include "entropymv.h"
 
+//#define MV_COUNT_TESTING
+
+#if CONFIG_NEWMVENTROPY
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 160
+
+/* Smooth or bias the mv-counts before prob computation */
+/* #define SMOOTH_MV_COUNTS */
+
+const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2] = {
+  -MV_JOINT_ZERO, 2,
+  -MV_JOINT_HNZVZ, 4,
+  -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+struct vp8_token_struct vp8_mv_joint_encodings[MV_JOINTS];
+
+const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2] = {
+  -MV_CLASS_0, 2,
+  -MV_CLASS_1, 4,
+  6, 8,
+  -MV_CLASS_2, -MV_CLASS_3,
+  10, 12,
+  -MV_CLASS_4, -MV_CLASS_5,
+  -MV_CLASS_6, -MV_CLASS_7,
+};
+struct vp8_token_struct vp8_mv_class_encodings[MV_CLASSES];
+
+const vp8_tree_index vp8_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
+  -0, -1,
+};
+struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+const vp8_tree_index vp8_mv_fp_tree [2 * 4 - 2] = {
+  -0, 2,
+  -1, 4,
+  -2, -3
+};
+struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+const nmv_context vp8_default_nmv_context = {
+  {32, 64, 96},
+  {
+    { /* vert component */
+      128,                                             /* sign */
+      {224, 144, 192, 168, 192, 176, 192},             /* class */
+      {216},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    },
+    { /* hor component */
+      128,                                             /* sign */
+      {216, 128, 176, 160, 176, 176, 192},             /* class */
+      {208},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    }
+  },
+};
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv) {
+  if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO;
+  else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ;
+  else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ;
+  else return MV_JOINT_HNZVNZ;
+}
+
+#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
+
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset) {
+  MV_CLASS_TYPE c;
+  if      (z < CLASS0_SIZE * 8)    c = MV_CLASS_0;
+  else if (z < CLASS0_SIZE * 16)   c = MV_CLASS_1;
+  else if (z < CLASS0_SIZE * 32)   c = MV_CLASS_2;
+  else if (z < CLASS0_SIZE * 64)   c = MV_CLASS_3;
+  else if (z < CLASS0_SIZE * 128)  c = MV_CLASS_4;
+  else if (z < CLASS0_SIZE * 256)  c = MV_CLASS_5;
+  else if (z < CLASS0_SIZE * 512)  c = MV_CLASS_6;
+  else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+  else assert(0);
+  if (offset)
+    *offset = z - mv_class_base(c);
+  return c;
+}
+
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset) {
+  return mv_class_base(c) + offset;
+}
+
+static void increment_nmv_component_count(int v,
+                                          nmv_component_counts *mvcomp,
+                                          int incr,
+                                          int usehp) {
+  assert (v != 0);            /* should not be zero */
+  mvcomp->mvcount[MV_MAX + v] += incr;
+}
+
+static void increment_nmv_component(int v,
+                                    nmv_component_counts *mvcomp,
+                                    int incr,
+                                    int usehp) {
+  int s, z, c, o, d, e, f;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  mvcomp->sign[s] += incr;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+  mvcomp->classes[c] += incr;
+
+  d = (o >> 3);               /* int mv data */
+  f = (o >> 1) & 3;           /* fractional pel mv data */
+  e = (o & 1);                /* high precision mv data */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0[d] += incr;
+  } else {
+    int i, b;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      mvcomp->bits[i][((d >> i) & 1)] += incr;
+  }
+
+  /* Code the fractional pel bits */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0_fp[d][f] += incr;
+  } else {
+    mvcomp->fp[f] += incr;
+  }
+
+  /* Code the high precision bit */
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      mvcomp->class0_hp[e] += incr;
+    } else {
+      mvcomp->hp[e] += incr;
+    }
+  } else {  /* assume the extra bit is 1 */
+    if (c == MV_CLASS_0) {
+      mvcomp->class0_hp[1] += incr;
+    } else {
+      mvcomp->hp[1] += incr;
+    }
+  }
+}
+
+#ifdef SMOOTH_MV_COUNTS
+static void smooth_counts(nmv_component_counts *mvcomp) {
+  static const int flen = 3;  // (filter_length + 1) / 2
+  static const int fval[] = {8, 3, 1};
+  static const int fvalbits = 4;
+  int i;
+  unsigned int smvcount[MV_VALS];
+  vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
+  smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
+  for (i = flen - 1; i <= MV_VALS - flen; ++i) {
+    int j, s = smvcount[i] * fval[0];
+    for (j = 1; j < flen; ++j)
+      s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
+    mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
+  }
+}
+#endif
+
+static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
+  int v;
+  vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
+  for (v = 1; v <= MV_MAX; v++) {
+    increment_nmv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
+    increment_nmv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
+  }
+}
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  mvctx->joints[j]++;
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp);
+  }
+}
+
+static void adapt_prob(vp8_prob *dest, vp8_prob prep, vp8_prob newp,
+                       unsigned int ct[2]) {
+  int factor;
+  int prob;
+  int count = ct[0] + ct[1];
+  if (count) {
+    count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
+    factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
+    prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8;
+    prob += !prob;
+    prob = (prob > 255 ? 255 : prob);
+    *dest = prob;
+  }
+}
+
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]) {
+  int i, j, k;
+  counts_to_context(&NMVcount->comps[0], usehp);
+  counts_to_context(&NMVcount->comps[1], usehp);
+  vp8_tree_probs_from_distribution(MV_JOINTS,
+                                   vp8_mv_joint_encodings,
+                                   vp8_mv_joint_tree,
+                                   prob->joints,
+                                   branch_ct_joint,
+                                   NMVcount->joints,
+                                   256, 1);
+  for (i = 0; i < 2; ++i) {
+    prob->comps[i].sign =
+        vp8_bin_prob_from_distribution(NMVcount->comps[i].sign);
+    branch_ct_sign[i][0] = NMVcount->comps[i].sign[0];
+    branch_ct_sign[i][1] = NMVcount->comps[i].sign[1];
+    vp8_tree_probs_from_distribution(MV_CLASSES,
+                                     vp8_mv_class_encodings,
+                                     vp8_mv_class_tree,
+                                     prob->comps[i].classes,
+                                     branch_ct_classes[i],
+                                     NMVcount->comps[i].classes,
+                                     256, 1);
+    vp8_tree_probs_from_distribution(CLASS0_SIZE,
+                                     vp8_mv_class0_encodings,
+                                     vp8_mv_class0_tree,
+                                     prob->comps[i].class0,
+                                     branch_ct_class0[i],
+                                     NMVcount->comps[i].class0,
+                                     256, 1);
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      prob->comps[i].bits[j] = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].bits[j]);
+      branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0];
+      branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1];
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (k = 0; k < CLASS0_SIZE; ++k) {
+      vp8_tree_probs_from_distribution(4,
+                                       vp8_mv_fp_encodings,
+                                       vp8_mv_fp_tree,
+                                       prob->comps[i].class0_fp[k],
+                                       branch_ct_class0_fp[i][k],
+                                       NMVcount->comps[i].class0_fp[k],
+                                       256, 1);
+    }
+    vp8_tree_probs_from_distribution(4,
+                                     vp8_mv_fp_encodings,
+                                     vp8_mv_fp_tree,
+                                     prob->comps[i].fp,
+                                     branch_ct_fp[i],
+                                     NMVcount->comps[i].fp,
+                                     256, 1);
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      prob->comps[i].class0_hp = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].class0_hp);
+      branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0];
+      branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1];
+
+      prob->comps[i].hp =
+          vp8_bin_prob_from_distribution(NMVcount->comps[i].hp);
+      branch_ct_hp[i][0] = NMVcount->comps[i].hp[0];
+      branch_ct_hp[i][1] = NMVcount->comps[i].hp[1];
+    }
+  }
+}
+
+void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) {
+  int i, j, k;
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+#ifdef MV_COUNT_TESTING
+  printf("joints count: ");
+  for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
+  printf("\n"); fflush(stdout);
+  printf("signs count:\n");
+  for (i = 0; i < 2; ++i)
+    printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
+  printf("\n"); fflush(stdout);
+  printf("classes count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_CLASSES; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0 count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("bits count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
+                       cm->fc.NMVcount.comps[i].bits[j][1]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0_fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 4; ++k)
+        printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("\n"); fflush(stdout);
+  }
+  printf("fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < 4; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
+    printf("\n"); fflush(stdout);
+  }
+  if (usehp) {
+    printf("class0_hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
+                       cm->fc.NMVcount.comps[i].class0_hp[1]);
+    printf("\n"); fflush(stdout);
+    printf("hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
+                       cm->fc.NMVcount.comps[i].hp[1]);
+    printf("\n"); fflush(stdout);
+  }
+#endif
+#ifdef SMOOTH_MV_COUNTS
+  smooth_counts(&cm->fc.NMVcount.comps[0]);
+  smooth_counts(&cm->fc.NMVcount.comps[1]);
+#endif
+  vp8_counts_to_nmv_context(&cm->fc.NMVcount,
+                            &prob,
+                            usehp,
+                            branch_ct_joint,
+                            branch_ct_sign,
+                            branch_ct_classes,
+                            branch_ct_class0,
+                            branch_ct_bits,
+                            branch_ct_class0_fp,
+                            branch_ct_fp,
+                            branch_ct_class0_hp,
+                            branch_ct_hp);
+
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    adapt_prob(&cm->fc.nmvc.joints[j],
+               cm->fc.pre_nmvc.joints[j],
+               prob.joints[j],
+               branch_ct_joint[j]);
+  }
+  for (i = 0; i < 2; ++i) {
+    adapt_prob(&cm->fc.nmvc.comps[i].sign,
+               cm->fc.pre_nmvc.comps[i].sign,
+               prob.comps[i].sign,
+               branch_ct_sign[i]);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].classes[j],
+                 cm->fc.pre_nmvc.comps[i].classes[j],
+                 prob.comps[i].classes[j],
+                 branch_ct_classes[i][j]);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0[j],
+                 cm->fc.pre_nmvc.comps[i].class0[j],
+                 prob.comps[i].class0[j],
+                 branch_ct_class0[i][j]);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
+                 cm->fc.pre_nmvc.comps[i].bits[j],
+                 prob.comps[i].bits[j],
+                 branch_ct_bits[i][j]);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 3; ++k) {
+        adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k],
+                   cm->fc.pre_nmvc.comps[i].class0_fp[j][k],
+                   prob.comps[i].class0_fp[j][k],
+                   branch_ct_class0_fp[i][j][k]);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].fp[j],
+                 cm->fc.pre_nmvc.comps[i].fp[j],
+                 prob.comps[i].fp[j],
+                 branch_ct_fp[i][j]);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
+                 cm->fc.pre_nmvc.comps[i].class0_hp,
+                 prob.comps[i].class0_hp,
+                 branch_ct_class0_hp[i]);
+      adapt_prob(&cm->fc.nmvc.comps[i].hp,
+                 cm->fc.pre_nmvc.comps[i].hp,
+                 prob.comps[i].hp,
+                 branch_ct_hp[i]);
+    }
+  }
+}
+
+#else   /* CONFIG_NEWMVENTROPY */
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 128
+
 const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = {
   {{
       237,
@@ -266,14 +703,6 @@ static void compute_component_probs_hp(
   }
 }
 
-void vp8_entropy_mv_init() {
-  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
-  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
-}
-
-// #define MV_COUNT_TESTING
-#define MV_COUNT_SAT 16
-#define MV_MAX_UPDATE_FACTOR 128
 void vp8_adapt_mv_probs(VP8_COMMON *cm) {
   int i, t, count, factor;
 #ifdef MV_COUNT_TESTING
@@ -400,3 +829,28 @@ void vp8_adapt_mv_probs(VP8_COMMON *cm) {
     }
   }
 }
+
+#endif  /* CONFIG_NEWMVENTROPY */
+
+void vp8_entropy_mv_init() {
+#if CONFIG_NEWMVENTROPY
+  vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree);
+  vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree);
+  vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree);
+  vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree);
+#else
+  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
+  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
+#endif
+}
+
+void vp8_init_mv_probs(VP8_COMMON *cm) {
+#if CONFIG_NEWMVENTROPY
+  vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context));
+#else
+  vpx_memcpy(cm->fc.mvc,
+             vp8_default_mv_context, sizeof(vp8_default_mv_context));
+  vpx_memcpy(cm->fc.mvc_hp,
+             vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
+#endif
+}
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 535d9b8..20af7e4 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -16,6 +16,121 @@
 #include "vpx_config.h"
 #include "blockd.h"
 
+struct VP8Common;
+
+void vp8_entropy_mv_init();
+void vp8_init_mv_probs(struct VP8Common *cm);
+void vp8_adapt_mv_probs(struct VP8Common *cm);
+#if CONFIG_NEWMVENTROPY
+void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp);
+#endif
+
+#if CONFIG_NEWMVENTROPY
+
+#define VP8_NMV_UPDATE_PROB  255
+//#define MV_GROUP_UPDATE
+
+#define LOW_PRECISION_MV_UPDATE  /* Use 7 bit forward update */
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS     4
+typedef enum {
+  MV_JOINT_ZERO = 0,             /* Zero vector */
+  MV_JOINT_HNZVZ = 1,            /* Vert zero, hor nonzero */
+  MV_JOINT_HZVNZ = 2,            /* Hor zero, vert nonzero */
+  MV_JOINT_HNZVNZ = 3,           /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+extern const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2];
+extern struct vp8_token_struct vp8_mv_joint_encodings [MV_JOINTS];
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES     8
+typedef enum {
+  MV_CLASS_0 = 0,      /* (0, 2]     integer pel */
+  MV_CLASS_1 = 1,      /* (2, 4]     integer pel */
+  MV_CLASS_2 = 2,      /* (4, 8]     integer pel */
+  MV_CLASS_3 = 3,      /* (8, 16]    integer pel */
+  MV_CLASS_4 = 4,      /* (16, 32]   integer pel */
+  MV_CLASS_5 = 5,      /* (32, 64]   integer pel */
+  MV_CLASS_6 = 6,      /* (64, 128]  integer pel */
+  MV_CLASS_7 = 7,      /* (128, 256] integer pel */
+} MV_CLASS_TYPE;
+
+extern const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2];
+extern struct vp8_token_struct vp8_mv_class_encodings [MV_CLASSES];
+
+#define CLASS0_BITS    1  /* bits at integer precision for class 0 */
+#define CLASS0_SIZE    (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+
+#define MV_MAX_BITS    (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX         ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS        ((MV_MAX << 1) + 1)
+
+extern const vp8_tree_index vp8_mv_class0_tree[2 * CLASS0_SIZE - 2];
+extern struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+extern const vp8_tree_index vp8_mv_fp_tree[2 * 4 - 2];
+extern struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+typedef struct {
+  vp8_prob sign;
+  vp8_prob classes[MV_CLASSES - 1];
+  vp8_prob class0[CLASS0_SIZE - 1];
+  vp8_prob bits[MV_OFFSET_BITS];
+  vp8_prob class0_fp[CLASS0_SIZE][4 - 1];
+  vp8_prob fp[4 - 1];
+  vp8_prob class0_hp;
+  vp8_prob hp;
+} nmv_component;
+
+typedef struct {
+  vp8_prob joints[MV_JOINTS - 1];
+  nmv_component comps[2];
+} nmv_context;
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv);
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset);
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset);
+
+
+typedef struct {
+  unsigned int mvcount[MV_VALS];
+  unsigned int sign[2];
+  unsigned int classes[MV_CLASSES];
+  unsigned int class0[CLASS0_SIZE];
+  unsigned int bits[MV_OFFSET_BITS][2];
+  unsigned int class0_fp[CLASS0_SIZE][4];
+  unsigned int fp[4];
+  unsigned int class0_hp[2];
+  unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+  unsigned int joints[MV_JOINTS];
+  nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp);
+extern const nmv_context vp8_default_nmv_context;
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]);
+
+#else  /* CONFIG_NEWMVENTROPY */
+
 enum {
   mv_max  = 1023,              /* max absolute value of a MV component */
   MVvals = (2 * mv_max) + 1,   /* # possible values "" */
@@ -73,8 +188,6 @@ extern struct vp8_token_struct vp8_small_mvencodings [8];
 extern const vp8_tree_index vp8_small_mvtree_hp[];
 extern struct vp8_token_struct vp8_small_mvencodings_hp [16];
 
-void vp8_entropy_mv_init();
-struct VP8Common;
-void vp8_adapt_mv_probs(struct VP8Common *cm);
+#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index b76c4cc..4cdcbeb 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -55,10 +55,15 @@ typedef struct frame_contexts {
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
+#if CONFIG_NEWMVENTROPY
+  nmv_context nmvc;
+  nmv_context pre_nmvc;
+#else
   MV_CONTEXT mvc[2];
   MV_CONTEXT_HP mvc_hp[2];
   MV_CONTEXT pre_mvc[2];
   MV_CONTEXT_HP pre_mvc_hp[2];
+#endif
   vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1];
   vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
   vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -88,8 +93,12 @@ typedef struct frame_contexts {
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
+#if CONFIG_NEWMVENTROPY
+  nmv_context_counts NMVcount;
+#else
   unsigned int MVcount [2] [MVvals];
   unsigned int MVcount_hp [2] [MVvals_hp];
+#endif
 #if CONFIG_SWITCHABLE_INTERP
   vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1]
                                  [VP8_SWITCHABLE_FILTERS-1];
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 70d3832..647b3ad 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -723,9 +723,9 @@ void vp8_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
 
         // Sub-pel filter
         xd->subpixel_predict8x8(pTemp, len,
-                               _o16x16mv.as_mv.col & 15,
-                               _o16x16mv.as_mv.row & 15,
-                               pDst, dst_uvstride);
+                                _o16x16mv.as_mv.col & 15,
+                                _o16x16mv.as_mv.row & 15,
+                                pDst, dst_uvstride);
       } else {
         filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8);
       }
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index def4caa..adf291b 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -124,3 +124,15 @@ void vp8_tree_probs_from_distribution(
       probs[t] = vp8_prob_half;
   } while (++t < tree_len);
 }
+
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]) {
+  int tot_count = counts[0] + counts[1];
+  vp8_prob prob;
+  if (tot_count) {
+    prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count;
+    prob += !prob;
+  } else {
+    prob = 128;
+  }
+  return prob;
+}
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index c4d0aa6..b7fa17d 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -85,5 +85,6 @@ void vp8bc_tree_probs_from_distribution(
   c_bool_coder_spec *s
 );
 
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]);
 
 #endif
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 069d073..e8b4a1f 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -172,6 +172,150 @@ static void vp8_kfread_modes(VP8D_COMP *pbi,
 
 }
 
+#if CONFIG_NEWMVENTROPY
+static int read_nmv_component(vp8_reader *r,
+                              int rv,
+                              const nmv_component *mvcomp) {
+  int v, s, z, c, o, d;
+  s = vp8_read(r, mvcomp->sign);
+  c = vp8_treed_read(r, vp8_mv_class_tree, mvcomp->classes);
+  if (c == MV_CLASS_0) {
+    d = vp8_treed_read(r, vp8_mv_class0_tree, mvcomp->class0);
+  } else {
+    int i, b;
+    d = 0;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      d |= (vp8_read(r, mvcomp->bits[i]) << i);
+  }
+  o = d << 3;
+
+  z = vp8_get_mv_mag(c, o);
+  v = (s ? -(z + 1) : (z + 1));
+  return v;
+}
+
+static int read_nmv_component_fp(vp8_reader *r,
+                                 int v,
+                                 int rv,
+                                 const nmv_component *mvcomp,
+                                 int usehp) {
+  int s, z, c, o, d, e, f;
+  s = v < 0;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+  d = o >> 3;
+
+  if (c == MV_CLASS_0) {
+    f = vp8_treed_read(r, vp8_mv_fp_tree, mvcomp->class0_fp[d]);
+  } else {
+    f = vp8_treed_read(r, vp8_mv_fp_tree, mvcomp->fp);
+  }
+  o += (f << 1);
+
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      e = vp8_read(r, mvcomp->class0_hp);
+    } else {
+      e = vp8_read(r, mvcomp->hp);
+    }
+    o += e;
+  } else {
+    ++o;  /* Note if hp is not used, the default value of the hp bit is 1 */
+  }
+  z = vp8_get_mv_mag(c, o);
+  v = (s ? -(z + 1) : (z + 1));
+  return v;
+}
+
+static void read_nmv(vp8_reader *r, MV *mv, const MV *ref,
+                     const nmv_context *mvctx) {
+  MV_JOINT_TYPE j = vp8_treed_read(r, vp8_mv_joint_tree, mvctx->joints);
+  mv->row = mv-> col = 0;
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]);
+  }
+}
+
+static void read_nmv_fp(vp8_reader *r, MV *mv, const MV *ref,
+                        const nmv_context *mvctx, int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0],
+                                    usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1],
+                                    usehp);
+  }
+}
+
+static void update_nmv(vp8_reader *bc, vp8_prob *const p,
+                       const vp8_prob upd_p) {
+  if (vp8_read(bc, upd_p)) {
+#ifdef LOW_PRECISION_MV_UPDATE
+    *p = (vp8_read_literal(bc, 7) << 1) | 1;
+#else
+    *p = (vp8_read_literal(bc, 8));
+#endif
+  }
+}
+
+static void read_nmvprobs(vp8_reader *bc, nmv_context *mvctx,
+                          int usehp) {
+  int i, j, k;
+#ifdef MV_GROUP_UPDATE
+  if (!vp8_read_bit(bc)) return;
+#endif
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    update_nmv(bc, &mvctx->joints[j],
+               VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    update_nmv(bc, &mvctx->comps[i].sign,
+               VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      update_nmv(bc, &mvctx->comps[i].classes[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      update_nmv(bc, &mvctx->comps[i].class0[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      update_nmv(bc, &mvctx->comps[i].bits[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 3; ++k)
+        update_nmv(bc, &mvctx->comps[i].class0_fp[j][k],
+                   VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < 3; ++j) {
+      update_nmv(bc, &mvctx->comps[i].fp[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      update_nmv(bc, &mvctx->comps[i].class0_hp,
+                 VP8_NMV_UPDATE_PROB);
+      update_nmv(bc, &mvctx->comps[i].hp,
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+}
+
+#else
+
 static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) {
   const vp8_prob *const p = (const vp8_prob *) mvc;
   int x = 0;
@@ -211,7 +355,6 @@ static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) {
 #endif
 }
 
-
 static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) {
   int i = 0;
 
@@ -287,6 +430,8 @@ static void read_mvcontexts_hp(vp8_reader *bc, MV_CONTEXT_HP *mvc) {
   } while (++i < 2);
 }
 
+#endif  /* CONFIG_NEWMVENTROPY */
+
 // Read the referncence frame
 static MV_REFERENCE_FRAME read_ref_frame(VP8D_COMP *pbi,
                                          vp8_reader *const bc,
@@ -452,8 +597,12 @@ static void read_switchable_interp_probs(VP8D_COMP *pbi) {
 static void mb_mode_mv_init(VP8D_COMP *pbi) {
   VP8_COMMON *const cm = & pbi->common;
   vp8_reader *const bc = & pbi->bc;
+#if CONFIG_NEWMVENTROPY
+  nmv_context *const nmvc = &pbi->common.fc.nmvc;
+#else
   MV_CONTEXT *const mvc = pbi->common.fc.mvc;
   MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
+#endif
   MACROBLOCKD *const xd  = & pbi->mb;
 
   vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
@@ -499,10 +648,14 @@ static void mb_mode_mv_init(VP8D_COMP *pbi) {
         cm->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
       } while (++i < VP8_YMODES - 1);
     }
+#if CONFIG_NEWMVENTROPY
+    read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv);
+#else
     if (xd->allow_high_precision_mv)
       read_mvcontexts_hp(bc, mvc_hp);
     else
       read_mvcontexts(bc, mvc);
+#endif
   }
 }
 
@@ -585,12 +738,16 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                              int mb_row, int mb_col) {
   VP8_COMMON *const cm = & pbi->common;
   vp8_reader *const bc = & pbi->bc;
+#if CONFIG_NEWMVENTROPY
+  nmv_context *const nmvc = &pbi->common.fc.nmvc;
+#else
   MV_CONTEXT *const mvc = pbi->common.fc.mvc;
   MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
+#endif
   const int mis = pbi->common.mode_info_stride;
   MACROBLOCKD *const xd  = & pbi->mb;
 
-  int_mv *const mv = & mbmi->mv[0];
+  int_mv *const mv = & mbmi->mv;
   int mb_to_left_edge;
   int mb_to_right_edge;
   int mb_to_top_edge;
@@ -684,7 +841,6 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
       // Update stats on relative distance of chosen vector to the
       // possible best reference vectors.
       {
-        int i;
         MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
 
         find_mv_refs(xd, mi, prev_mi,
@@ -833,8 +989,6 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
           int mv_contz;
           int blockmode;
 
-          second_leftmv.as_int = 0;
-          second_abovemv.as_int = 0;
           k = vp8_mbsplit_offset[s][j];
 
           leftmv.as_int = left_block_mv(mi, k);
@@ -849,6 +1003,13 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
           switch (blockmode) {
             case NEW4X4:
+#if CONFIG_NEWMVENTROPY
+              read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc);
+              read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+                          xd->allow_high_precision_mv);
+              vp8_increment_nmv(&blockmv.as_mv, &best_mv.as_mv,
+                                &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
               if (xd->allow_high_precision_mv) {
                 read_mv_hp(bc, &blockmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
                 cm->fc.MVcount_hp[0][mv_max_hp + (blockmv.as_mv.row)]++;
@@ -858,10 +1019,18 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                 cm->fc.MVcount[0][mv_max + (blockmv.as_mv.row >> 1)]++;
                 cm->fc.MVcount[1][mv_max + (blockmv.as_mv.col >> 1)]++;
               }
+#endif  /* CONFIG_NEWMVENTROPY */
               blockmv.as_mv.row += best_mv.as_mv.row;
               blockmv.as_mv.col += best_mv.as_mv.col;
 
               if (mbmi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+                read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc);
+                read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+                            xd->allow_high_precision_mv);
+                vp8_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv,
+                                  &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
                 if (xd->allow_high_precision_mv) {
                   read_mv_hp(bc, &secondmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
                   cm->fc.MVcount_hp[0][mv_max_hp + (secondmv.as_mv.row)]++;
@@ -871,6 +1040,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                   cm->fc.MVcount[0][mv_max + (secondmv.as_mv.row >> 1)]++;
                   cm->fc.MVcount[1][mv_max + (secondmv.as_mv.col >> 1)]++;
                 }
+#endif  /* CONFIG_NEWMVENTROPY */
                 secondmv.as_mv.row += best_mv_second.as_mv.row;
                 secondmv.as_mv.col += best_mv_second.as_mv.col;
               }
@@ -975,6 +1145,13 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
         break;
 
       case NEWMV:
+#if CONFIG_NEWMVENTROPY
+        read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
+        read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
+                    xd->allow_high_precision_mv);
+        vp8_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount,
+                          xd->allow_high_precision_mv);
+#else
         if (xd->allow_high_precision_mv) {
           read_mv_hp(bc, &mv->as_mv, (const MV_CONTEXT_HP *) mvc_hp);
           cm->fc.MVcount_hp[0][mv_max_hp + (mv->as_mv.row)]++;
@@ -984,6 +1161,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
           cm->fc.MVcount[0][mv_max + (mv->as_mv.row >> 1)]++;
           cm->fc.MVcount[1][mv_max + (mv->as_mv.col >> 1)]++;
         }
+#endif  /* CONFIG_NEWMVENTROPY */
         mv->as_mv.row += best_mv.as_mv.row;
         mv->as_mv.col += best_mv.as_mv.col;
 
@@ -998,6 +1176,13 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                                                       mb_to_top_edge,
                                                       mb_to_bottom_edge);
         if (mbmi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+          read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc);
+          read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
+                      xd->allow_high_precision_mv);
+          vp8_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv,
+                            &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
           if (xd->allow_high_precision_mv) {
             read_mv_hp(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT_HP *) mvc_hp);
             cm->fc.MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row)]++;
@@ -1007,6 +1192,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
             cm->fc.MVcount[0][mv_max + (mbmi->mv[1].as_mv.row >> 1)]++;
             cm->fc.MVcount[1][mv_max + (mbmi->mv[1].as_mv.col >> 1)]++;
           }
+#endif  /* CONFIG_NEWMVENTROPY */
           mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row;
           mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col;
           mbmi->need_to_clamp_secondmv |=
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index d932683..5fb510b 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -923,9 +923,7 @@ static void init_frame(VP8D_COMP *pbi) {
 
   if (pc->frame_type == KEY_FRAME) {
     /* Various keyframe initializations */
-    vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
-    vpx_memcpy(pc->fc.mvc_hp, vp8_default_mv_context_hp,
-               sizeof(vp8_default_mv_context_hp));
+    vp8_init_mv_probs(pc);
 
     vp8_init_mbmode_probs(pc);
 
@@ -1464,8 +1462,12 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
   vp8_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
   vp8_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
   vp8_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
+#if CONFIG_NEWMVENTROPY
+  pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
+#else
   vp8_copy(pbi->common.fc.pre_mvc, pbi->common.fc.mvc);
   vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp);
+#endif
   vp8_zero(pbi->common.fc.coef_counts);
   vp8_zero(pbi->common.fc.coef_counts_8x8);
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
@@ -1477,8 +1479,12 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
   vp8_zero(pbi->common.fc.i8x8_mode_counts);
   vp8_zero(pbi->common.fc.sub_mv_ref_counts);
   vp8_zero(pbi->common.fc.mbsplit_counts);
+#if CONFIG_NEWMVENTROPY
+  vp8_zero(pbi->common.fc.NMVcount);
+#else
   vp8_zero(pbi->common.fc.MVcount);
   vp8_zero(pbi->common.fc.MVcount_hp);
+#endif
   vp8_zero(pbi->common.fc.mv_ref_ct);
   vp8_zero(pbi->common.fc.mv_ref_ct_a);
 #if COEFUPDATETYPE == 2
@@ -1544,7 +1550,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
   vp8_adapt_coef_probs(pc);
   if (pc->frame_type != KEY_FRAME) {
     vp8_adapt_mode_probs(pc);
+#if CONFIG_NEWMVENTROPY
+    vp8_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
+#else
     vp8_adapt_mv_probs(pc);
+#endif
     vp8_update_mode_context(&pbi->common);
   }
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 7d94bec..7120196 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -27,6 +27,7 @@
 #include "vp8/common/seg_common.h"
 #include "vp8/common/pred_common.h"
 #include "vp8/common/entropy.h"
+#include "vp8/encoder/encodemv.h"
 
 #if CONFIG_NEW_MVREF
 #include "vp8/common/mvref_common.h"
@@ -36,6 +37,8 @@
 unsigned __int64 Sectionbits[500];
 #endif
 
+//int final_packing = 0;
+
 #ifdef ENTROPY_STATS
 int intra_mode_stats [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
 unsigned int tree_update_hist [BLOCK_TYPES]
@@ -564,6 +567,19 @@ static void write_sub_mv_ref
                   vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }
 
+#if CONFIG_NEWMVENTROPY
+static void write_nmv (vp8_writer *w, const MV *mv, const int_mv *ref,
+                       const nmv_context *nmvc, int usehp) {
+  MV e;
+  e.row = mv->row - ref->as_mv.row;
+  e.col = mv->col - ref->as_mv.col;
+
+  vp8_encode_nmv(w, &e, &ref->as_mv, nmvc);
+  vp8_encode_nmv_fp(w, &e, &ref->as_mv, nmvc, usehp);
+}
+
+#else
+
 static void write_mv
 (
   vp8_writer *w, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc
@@ -585,6 +601,7 @@ static void write_mv_hp
 
   vp8_encode_motion_vector_hp(w, &e, mvc);
 }
+#endif  /* CONFIG_NEWMVENTROPY */
 
 // This function writes the current macro block's segnment id to the bitstream
 // It should only be called if a segment map update is indicated.
@@ -737,8 +754,12 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
   int i;
   VP8_COMMON *const pc = & cpi->common;
   vp8_writer *const w = & cpi->bc;
+#if CONFIG_NEWMVENTROPY
+  const nmv_context *nmvc = &pc->fc.nmvc;
+#else
   const MV_CONTEXT *mvc = pc->fc.mvc;
   const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
+#endif
   MACROBLOCKD *xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
   MODE_INFO *prev_m;
@@ -754,6 +775,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
   int row_delta[4] = { 0, +1,  0, -1};
   int col_delta[4] = { +1, -1, +1, +1};
 
+  //final_packing = !cpi->dummy_packing;
+
   cpi->mb.partition_info = cpi->mb.pi;
 
   // Update the probabilities used to encode reference frame data
@@ -814,10 +837,14 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
 
   update_mbintra_mode_probs(cpi);
 
+#if CONFIG_NEWMVENTROPY
+  vp8_write_nmvprobs(cpi, xd->allow_high_precision_mv);
+#else
   if (xd->allow_high_precision_mv)
     vp8_write_mvprobs_hp(cpi);
   else
     vp8_write_mvprobs(cpi);
+#endif
 
   mb_row = 0;
   for (row = 0; row < pc->mb_rows; row += 2) {
@@ -1055,10 +1082,17 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
 
                 pick_best_mv_ref( mi->mv[0], mi->ref_mvs[rf], &best_mv);
 #endif
-                if (xd->allow_high_precision_mv)
+#if CONFIG_NEWMVENTROPY
+                write_nmv(w, &mi->mv[0].as_mv, &best_mv,
+                          (const nmv_context*) nmvc,
+                          xd->allow_high_precision_mv);
+#else
+                if (xd->allow_high_precision_mv) {
                   write_mv_hp(w, &mi->mv[0].as_mv, &best_mv, mvc_hp);
-                else
+                } else {
                   write_mv(w, &mi->mv[0].as_mv, &best_mv, mvc);
+                }
+#endif
 
                 if (mi->second_ref_frame) {
 #if 0 //CONFIG_NEW_MVREF
@@ -1071,10 +1105,17 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
                                     mi->ref_mvs[mi->second_ref_frame],
                                     &best_second_mv);
 #endif
-                  if (xd->allow_high_precision_mv)
+#if CONFIG_NEWMVENTROPY
+                  write_nmv(w, &mi->mv[1].as_mv, &best_second_mv,
+                            (const nmv_context*) nmvc,
+                            xd->allow_high_precision_mv);
+#else
+                  if (xd->allow_high_precision_mv) {
                     write_mv_hp(w, &mi->mv[1].as_mv, &best_second_mv, mvc_hp);
-                  else
+                  } else {
                     write_mv(w, &mi->mv[1].as_mv, &best_second_mv, mvc);
+                  }
+#endif
                 }
                 break;
               case SPLITMV: {
@@ -1116,6 +1157,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
 #ifdef ENTROPY_STATS
                     active_section = 11;
 #endif
+#if CONFIG_NEWMVENTROPY
+                    write_nmv(w, &blockmv.as_mv, &best_mv,
+                              (const nmv_context*) nmvc,
+                              xd->allow_high_precision_mv);
+#else
                     if (xd->allow_high_precision_mv) {
                       write_mv_hp(w, &blockmv.as_mv, &best_mv,
                                   (const MV_CONTEXT_HP *) mvc_hp);
@@ -1123,8 +1169,16 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
                       write_mv(w, &blockmv.as_mv, &best_mv,
                                (const MV_CONTEXT *) mvc);
                     }
+#endif
 
                     if (mi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+                      write_nmv(w,
+                                &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
+                                &best_second_mv,
+                                (const nmv_context*) nmvc,
+                                xd->allow_high_precision_mv);
+#else
                       if (xd->allow_high_precision_mv) {
                         write_mv_hp(w, &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
                                     &best_second_mv, (const MV_CONTEXT_HP *) mvc_hp);
@@ -1132,6 +1186,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
                         write_mv(w, &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
                                  &best_second_mv, (const MV_CONTEXT *) mvc);
                       }
+#endif
                     }
                   }
                 } while (++j < cpi->mb.partition_info->count);
@@ -2313,8 +2368,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
   vp8_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
   vp8_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
   vp8_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
+#if CONFIG_NEWMVENTROPY
+  cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
+#else
   vp8_copy(cpi->common.fc.pre_mvc, cpi->common.fc.mvc);
   vp8_copy(cpi->common.fc.pre_mvc_hp, cpi->common.fc.mvc_hp);
+#endif
   vp8_zero(cpi->sub_mv_ref_count);
   vp8_zero(cpi->mbsplit_count);
   vp8_zero(cpi->common.fc.mv_ref_ct)
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 7798056..c0cd2e7 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -117,6 +117,19 @@ typedef struct {
   int *mb_norm_activity_ptr;
   signed int act_zbin_adj;
 
+#if CONFIG_NEWMVENTROPY
+  int nmvjointcost[MV_JOINTS];
+  int nmvcosts[2][MV_VALS];
+  int *nmvcost[2];
+  int nmvcosts_hp[2][MV_VALS];
+  int *nmvcost_hp[2];
+
+  int nmvjointsadcost[MV_JOINTS];
+  int nmvsadcosts[2][MV_VALS];
+  int *nmvsadcost[2];
+  int nmvsadcosts_hp[2][MV_VALS];
+  int *nmvsadcost_hp[2];
+#else
   int mvcosts[2][MVvals + 1];
   int *mvcost[2];
   int mvsadcosts[2][MVfpvals + 1];
@@ -125,6 +138,7 @@ typedef struct {
   int *mvcost_hp[2];
   int mvsadcosts_hp[2][MVfpvals_hp + 1];
   int *mvsadcost_hp[2];
+#endif  /* CONFIG_NEWMVENTROPY */
 
   int mbmode_cost[2][MB_MODE_COUNT];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
@@ -150,8 +164,6 @@ typedef struct {
   signed char *gf_active_ptr;
 
   unsigned char *active_ptr;
-  MV_CONTEXT *mvc;
-  MV_CONTEXT_HP *mvc_hp;
 
   unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
     [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 41b87f2..4670c31 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1280,9 +1280,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) {
 #endif
   // vp8_zero(cpi->uv_mode_count)
 
-  x->mvc = cm->fc.mvc;
-  x->mvc_hp = cm->fc.mvc_hp;
-
   vpx_memset(cm->above_context, 0,
              sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 
@@ -1357,8 +1354,12 @@ static void encode_frame_internal(VP8_COMP *cpi) {
   xd->mode_info_context = cm->mi;
   xd->prev_mode_info_context = cm->prev_mi;
 
+#if CONFIG_NEWMVENTROPY
+  vp8_zero(cpi->NMVcount);
+#else
   vp8_zero(cpi->MVcount);
   vp8_zero(cpi->MVcount_hp);
+#endif
   vp8_zero(cpi->coef_counts);
   vp8_zero(cpi->coef_counts_8x8);
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index e5426f6..217cc00 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -20,6 +20,536 @@
 extern unsigned int active_section;
 #endif
 
+//extern int final_packing;
+
+#if CONFIG_NEWMVENTROPY
+
+#ifdef NMV_STATS
+nmv_context_counts tnmvcounts;
+#endif
+
+static void encode_nmv_component(vp8_writer *w,
+                                 int v,
+                                 int r,
+                                 const nmv_component *mvcomp) {
+  int s, z, c, o, d;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  vp8_write(w, s, mvcomp->sign);
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+
+  vp8_write_token(w, vp8_mv_class_tree, mvcomp->classes,
+                  vp8_mv_class_encodings + c);
+
+  d = (o >> 3);               /* int mv data */
+
+  if (c == MV_CLASS_0) {
+    vp8_write_token(w, vp8_mv_class0_tree, mvcomp->class0,
+                    vp8_mv_class0_encodings + d);
+  } else {
+    int i, b;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      vp8_write(w, ((d >> i) & 1), mvcomp->bits[i]);
+  }
+}
+
+static void encode_nmv_component_fp(vp8_writer *w,
+                                    int v,
+                                    int r,
+                                    const nmv_component *mvcomp,
+                                    int usehp) {
+  int s, z, c, o, d, f, e;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+
+  d = (o >> 3);               /* int mv data */
+  f = (o >> 1) & 3;           /* fractional pel mv data */
+  e = (o & 1);                /* high precision mv data */
+
+  /* Code the fractional pel bits */
+  if (c == MV_CLASS_0) {
+    vp8_write_token(w, vp8_mv_fp_tree, mvcomp->class0_fp[d],
+                    vp8_mv_fp_encodings + f);
+  } else {
+    vp8_write_token(w, vp8_mv_fp_tree, mvcomp->fp,
+                    vp8_mv_fp_encodings + f);
+  }
+  /* Code the high precision bit */
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      vp8_write(w, e, mvcomp->class0_hp);
+    } else {
+      vp8_write(w, e, mvcomp->hp);
+    }
+  }
+}
+
+static void build_nmv_component_cost_table(int *mvcost,
+                                           const nmv_component *mvcomp,
+                                           int usehp) {
+  int i, v;
+  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
+  int bits_cost[MV_OFFSET_BITS][2];
+  int class0_fp_cost[CLASS0_SIZE][4], fp_cost[4];
+  int class0_hp_cost[2], hp_cost[2];
+
+  sign_cost[0] = vp8_cost_zero(mvcomp->sign);
+  sign_cost[1] = vp8_cost_one(mvcomp->sign);
+  vp8_cost_tokens(class_cost, mvcomp->classes, vp8_mv_class_tree);
+  vp8_cost_tokens(class0_cost, mvcomp->class0, vp8_mv_class0_tree);
+  for (i = 0; i < MV_OFFSET_BITS; ++i) {
+    bits_cost[i][0] = vp8_cost_zero(mvcomp->bits[i]);
+    bits_cost[i][1] = vp8_cost_one(mvcomp->bits[i]);
+  }
+
+  for (i = 0; i < CLASS0_SIZE; ++i)
+    vp8_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp8_mv_fp_tree);
+  vp8_cost_tokens(fp_cost, mvcomp->fp, vp8_mv_fp_tree);
+
+  if (usehp) {
+    class0_hp_cost[0] = vp8_cost_zero(mvcomp->class0_hp);
+    class0_hp_cost[1] = vp8_cost_one(mvcomp->class0_hp);
+    hp_cost[0] = vp8_cost_zero(mvcomp->hp);
+    hp_cost[1] = vp8_cost_one(mvcomp->hp);
+  }
+  mvcost[0] = 0;
+  for (v = 1; v <= MV_MAX; ++v) {
+    int z, c, o, d, e, f, cost = 0;
+    z = v - 1;
+    c = vp8_get_mv_class(z, &o);
+    cost += class_cost[c];
+    d = (o >> 3);               /* int mv data */
+    f = (o >> 1) & 3;           /* fractional pel mv data */
+    e = (o & 1);                /* high precision mv data */
+    if (c == MV_CLASS_0) {
+      cost += class0_cost[d];
+    } else {
+      int i, b;
+      b = c + CLASS0_BITS - 1;  /* number of bits */
+      for (i = 0; i < b; ++i)
+        cost += bits_cost[i][((d >> i) & 1)];
+    }
+    if (c == MV_CLASS_0) {
+      cost += class0_fp_cost[d][f];
+    } else {
+      cost += fp_cost[f];
+    }
+    if (usehp) {
+      if (c == MV_CLASS_0) {
+        cost += class0_hp_cost[e];
+      } else {
+        cost += hp_cost[e];
+      }
+    }
+    mvcost[v] = cost + sign_cost[0];
+    mvcost[-v] = cost + sign_cost[1];
+  }
+}
+
+static int update_nmv_savings(const unsigned int ct[2],
+                              const vp8_prob cur_p,
+                              const vp8_prob new_p,
+                              const vp8_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+  vp8_prob mod_p = new_p | 1;
+#else
+  vp8_prob mod_p = new_p;
+#endif
+  const int cur_b = vp8_cost_branch256(ct, cur_p);
+  const int mod_b = vp8_cost_branch256(ct, mod_p);
+  const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+      256 +
+#endif
+      (vp8_cost_one(upd_p) - vp8_cost_zero(upd_p));
+  if (cur_b - mod_b - cost > 0) {
+    return cur_b - mod_b - cost;
+  } else {
+    return -vp8_cost_zero(upd_p);
+  }
+}
+
+static int update_nmv(
+  vp8_writer *const w,
+  const unsigned int ct[2],
+  vp8_prob *const cur_p,
+  const vp8_prob new_p,
+  const vp8_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+  vp8_prob mod_p = new_p | 1;
+#else
+  vp8_prob mod_p = new_p;
+#endif
+
+  const int cur_b = vp8_cost_branch256(ct, *cur_p);
+  const int mod_b = vp8_cost_branch256(ct, mod_p);
+  const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+      256 +
+#endif
+      (vp8_cost_one(upd_p) - vp8_cost_zero(upd_p));
+
+  if (cur_b - mod_b > cost) {
+    *cur_p = mod_p;
+    vp8_write(w, 1, upd_p);
+#ifdef LOW_PRECISION_MV_UPDATE
+    vp8_write_literal(w, mod_p >> 1, 7);
+#else
+    vp8_write_literal(w, mod_p, 8);
+#endif
+    return 1;
+  } else {
+    vp8_write(w, 0, upd_p);
+    return 0;
+  }
+}
+
+#ifdef NMV_STATS
+void init_nmvstats() {
+  vp8_zero(tnmvcounts);
+}
+
+void print_nmvstats() {
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+  int i, j, k;
+  vp8_counts_to_nmv_context(&tnmvcounts, &prob, 1,
+                            branch_ct_joint, branch_ct_sign, branch_ct_classes,
+                            branch_ct_class0, branch_ct_bits,
+                            branch_ct_class0_fp, branch_ct_fp,
+                            branch_ct_class0_hp, branch_ct_hp);
+
+  printf("\nCounts =\n  { ");
+  for (j = 0; j < MV_JOINTS; ++j)
+    printf("%d, ", tnmvcounts.joints[j]);
+  printf("},\n");
+  for (i=0; i< 2; ++i) {
+    printf("  {\n");
+    printf("    %d/%d,\n", tnmvcounts.comps[i].sign[0],
+                           tnmvcounts.comps[i].sign[1]);
+    printf("    { ");
+    for (j = 0; j < MV_CLASSES; ++j)
+      printf("%d, ", tnmvcounts.comps[i].classes[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      printf("%d, ", tnmvcounts.comps[i].class0[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d/%d, ", tnmvcounts.comps[i].bits[j][0],
+                        tnmvcounts.comps[i].bits[j][1]);
+    printf("},\n");
+
+    printf("    {");
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 4; ++k)
+        printf("%d, ", tnmvcounts.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("},\n");
+
+    printf("    { ");
+    for (j = 0; j < 4; ++j)
+      printf("%d, ", tnmvcounts.comps[i].fp[j]);
+    printf("},\n");
+
+    printf("    %d/%d,\n",
+           tnmvcounts.comps[i].class0_hp[0],
+           tnmvcounts.comps[i].class0_hp[1]);
+    printf("    %d/%d,\n",
+           tnmvcounts.comps[i].hp[0],
+           tnmvcounts.comps[i].hp[1]);
+    printf("  },\n");
+  }
+
+  printf("\nProbs =\n  { ");
+  for (j = 0; j < MV_JOINTS - 1; ++j)
+    printf("%d, ", prob.joints[j]);
+  printf("},\n");
+  for (i=0; i< 2; ++i) {
+    printf("  {\n");
+    printf("    %d,\n", prob.comps[i].sign);
+    printf("    { ");
+    for (j = 0; j < MV_CLASSES - 1; ++j)
+      printf("%d, ", prob.comps[i].classes[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE - 1; ++j)
+      printf("%d, ", prob.comps[i].class0[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d, ", prob.comps[i].bits[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 3; ++k)
+        printf("%d, ", prob.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < 3; ++j)
+      printf("%d, ", prob.comps[i].fp[j]);
+    printf("},\n");
+
+    printf("    %d,\n", prob.comps[i].class0_hp);
+    printf("    %d,\n", prob.comps[i].hp);
+    printf("  },\n");
+  }
+}
+
+static void add_nmvcount(nmv_context_counts *dst, nmv_context_counts *src) {
+  int i, j, k;
+  for (j = 0; j < MV_JOINTS; ++j) {
+    dst->joints[j] += src->joints[j];
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_VALS; ++j) {
+      dst->comps[i].mvcount[j] += src->comps[i].mvcount[j];
+    }
+    dst->comps[i].sign[0] += src->comps[i].sign[0];
+    dst->comps[i].sign[1] += src->comps[i].sign[1];
+    for (j = 0; j < MV_CLASSES; ++j) {
+      dst->comps[i].classes[j] += src->comps[i].classes[j];
+    }
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      dst->comps[i].class0[j] += src->comps[i].class0[j];
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      dst->comps[i].bits[j][0] += src->comps[i].bits[j][0];
+      dst->comps[i].bits[j][1] += src->comps[i].bits[j][1];
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 4; ++k) {
+        dst->comps[i].class0_fp[j][k] += src->comps[i].class0_fp[j][k];
+      }
+    }
+    for (j = 0; j < 4; ++j) {
+      dst->comps[i].fp[j] += src->comps[i].fp[j];
+    }
+    dst->comps[i].class0_hp[0] += src->comps[i].class0_hp[0];
+    dst->comps[i].class0_hp[1] += src->comps[i].class0_hp[1];
+    dst->comps[i].hp[0] += src->comps[i].hp[0];
+    dst->comps[i].hp[1] += src->comps[i].hp[1];
+  }
+}
+#endif
+
+void vp8_write_nmvprobs(VP8_COMP * cpi, int usehp) {
+  vp8_writer *const w  = & cpi->bc;
+  int i, j;
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+  int savings = 0;
+
+#ifdef NMV_STATS
+  if (!cpi->dummy_packing)
+    add_nmvcount(&tnmvcounts, &cpi->NMVcount);
+#endif
+  vp8_counts_to_nmv_context(&cpi->NMVcount, &prob, usehp,
+                            branch_ct_joint, branch_ct_sign, branch_ct_classes,
+                            branch_ct_class0, branch_ct_bits,
+                            branch_ct_class0_fp, branch_ct_fp,
+                            branch_ct_class0_hp, branch_ct_hp);
+  /* write updates if they help */
+#ifdef MV_GROUP_UPDATE
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    savings += update_nmv_savings(branch_ct_joint[j],
+                                  cpi->common.fc.nmvc.joints[j],
+                                  prob.joints[j],
+                                  VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    savings += update_nmv_savings(branch_ct_sign[i],
+                                  cpi->common.fc.nmvc.comps[i].sign,
+                                  prob.comps[i].sign,
+                                  VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      savings += update_nmv_savings(branch_ct_classes[i][j],
+                                    cpi->common.fc.nmvc.comps[i].classes[j],
+                                    prob.comps[i].classes[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      savings += update_nmv_savings(branch_ct_class0[i][j],
+                                    cpi->common.fc.nmvc.comps[i].class0[j],
+                                    prob.comps[i].class0[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      savings += update_nmv_savings(branch_ct_bits[i][j],
+                                    cpi->common.fc.nmvc.comps[i].bits[j],
+                                    prob.comps[i].bits[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      int k;
+      for (k = 0; k < 3; ++k) {
+        savings += update_nmv_savings(branch_ct_class0_fp[i][j][k],
+                                      cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+                                      prob.comps[i].class0_fp[j][k],
+                                      VP8_NMV_UPDATE_PROB);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      savings += update_nmv_savings(branch_ct_fp[i][j],
+                                    cpi->common.fc.nmvc.comps[i].fp[j],
+                                    prob.comps[i].fp[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      savings += update_nmv_savings(branch_ct_class0_hp[i],
+                                    cpi->common.fc.nmvc.comps[i].class0_hp,
+                                    prob.comps[i].class0_hp,
+                                    VP8_NMV_UPDATE_PROB);
+      savings += update_nmv_savings(branch_ct_hp[i],
+                                    cpi->common.fc.nmvc.comps[i].hp,
+                                    prob.comps[i].hp,
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (savings <= 0) {
+    vp8_write_bit(w, 0);
+    return;
+  }
+  vp8_write_bit(w, 1);
+#endif
+
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    update_nmv(w, branch_ct_joint[j],
+               &cpi->common.fc.nmvc.joints[j],
+               prob.joints[j],
+               VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    update_nmv(w, branch_ct_sign[i],
+               &cpi->common.fc.nmvc.comps[i].sign,
+               prob.comps[i].sign,
+               VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      update_nmv(w, branch_ct_classes[i][j],
+                 &cpi->common.fc.nmvc.comps[i].classes[j],
+                 prob.comps[i].classes[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      update_nmv(w, branch_ct_class0[i][j],
+                 &cpi->common.fc.nmvc.comps[i].class0[j],
+                 prob.comps[i].class0[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      update_nmv(w, branch_ct_bits[i][j],
+                 &cpi->common.fc.nmvc.comps[i].bits[j],
+                 prob.comps[i].bits[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      int k;
+      for (k = 0; k < 3; ++k) {
+        update_nmv(w, branch_ct_class0_fp[i][j][k],
+                   &cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+                   prob.comps[i].class0_fp[j][k],
+                   VP8_NMV_UPDATE_PROB);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      update_nmv(w, branch_ct_fp[i][j],
+                 &cpi->common.fc.nmvc.comps[i].fp[j],
+                 prob.comps[i].fp[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      update_nmv(w, branch_ct_class0_hp[i],
+                 &cpi->common.fc.nmvc.comps[i].class0_hp,
+                 prob.comps[i].class0_hp,
+                 VP8_NMV_UPDATE_PROB);
+      update_nmv(w, branch_ct_hp[i],
+                 &cpi->common.fc.nmvc.comps[i].hp,
+                 prob.comps[i].hp,
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+}
+
+void vp8_encode_nmv(vp8_writer *w, const MV *mv, const MV *ref,
+                    const nmv_context *mvctx) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  vp8_write_token(w, vp8_mv_joint_tree, mvctx->joints,
+                  vp8_mv_joint_encodings + j);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component(w, mv->row, ref->col, &mvctx->comps[0]);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component(w, mv->col, ref->col, &mvctx->comps[1]);
+  }
+}
+
+void vp8_encode_nmv_fp(vp8_writer *w, const MV *mv, const MV *ref,
+                       const nmv_context *mvctx, int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component_fp(w, mv->row, ref->row, &mvctx->comps[0], usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component_fp(w, mv->col, ref->col, &mvctx->comps[1], usehp);
+  }
+}
+
+void vp8_build_nmv_cost_table(int *mvjoint,
+                              int *mvcost[2],
+                              const nmv_context *mvctx,
+                              int usehp,
+                              int mvc_flag_v,
+                              int mvc_flag_h) {
+  vp8_clear_system_state();
+  vp8_cost_tokens(mvjoint, mvctx->joints, vp8_mv_joint_tree);
+  if (mvc_flag_v)
+    build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
+  if (mvc_flag_h)
+    build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+}
+
+#else  /* CONFIG_NEWMVENTROPY */
+
 static void encode_mvcomponent(
   vp8_writer *const w,
   const int v,
@@ -596,8 +1126,9 @@ void vp8_write_mvprobs_hp(VP8_COMP *cpi) {
     vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp,
                                       (const MV_CONTEXT_HP *)
                                       cpi->common.fc.mvc_hp, flags);
-
 #ifdef ENTROPY_STATS
   active_section = 5;
 #endif
 }
+
+#endif  /* CONFIG_NEWMVENTROPY */
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 7e33007..e675fe0 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -14,11 +14,31 @@
 
 #include "onyx_int.h"
 
+#if CONFIG_NEWMVENTROPY
+void vp8_write_nmvprobs(VP8_COMP *, int usehp);
+void vp8_encode_nmv(vp8_writer *w, const MV *mv, const MV *ref,
+                    const nmv_context *mvctx);
+void vp8_encode_nmv_fp(vp8_writer *w, const MV *mv, const MV *ref,
+                       const nmv_context *mvctx, int usehp);
+void vp8_build_nmv_cost_table(int *mvjoint,
+                              int *mvcost[2],
+                              const nmv_context *mvctx,
+                              int usehp,
+                              int mvc_flag_v,
+                              int mvc_flag_h);
+#else  /* CONFIG_NEWMVENTROPY */
 void vp8_write_mvprobs(VP8_COMP *);
-void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *);
-void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]);
+void vp8_encode_motion_vector(vp8_writer *, const MV *,
+                              const MV_CONTEXT *);
+void vp8_build_component_cost_table(int *mvcost[2],
+                                    const MV_CONTEXT *mvc,
+                                    int mvc_flag[2]);
 void vp8_write_mvprobs_hp(VP8_COMP *);
-void vp8_encode_motion_vector_hp(vp8_writer *, const MV *, const MV_CONTEXT_HP *);
-void vp8_build_component_cost_table_hp(int *mvcost[2], const MV_CONTEXT_HP *mvc, int mvc_flag[2]);
+void vp8_encode_motion_vector_hp(vp8_writer *, const MV *,
+                                 const MV_CONTEXT_HP *);
+void vp8_build_component_cost_table_hp(int *mvcost[2],
+                                       const MV_CONTEXT_HP *mvc,
+                                       int mvc_flag[2]);
+#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index c1504f2..1e54371 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -28,6 +28,7 @@
 #include "rdopt.h"
 #include "ratectrl.h"
 #include "vp8/common/quant_common.h"
+#include "vp8/common/entropymv.h"
 #include "encodemv.h"
 
 #define OUTPUT_FPF 0
@@ -38,8 +39,6 @@
 #define IF_RTCD(x) NULL
 #endif
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-
 extern void vp8_build_block_offsets(MACROBLOCK *x);
 extern void vp8_setup_block_ptrs(MACROBLOCK *x);
 extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
@@ -492,11 +491,13 @@ void vp8_first_pass(VP8_COMP *cpi) {
   // if ( 0 )
   {
     int flag[2] = {1, 1};
+    vp8_init_mv_probs(cm);
+#if CONFIG_NEWMVENTROPY
     vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
-    vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
+#else
     vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
-    vpx_memcpy(cm->fc.mvc_hp, vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
     vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cm->fc.mvc_hp, flag);
+#endif
   }
 
   // for each macroblock row in image
diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index dde000a..d512ae4 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -31,12 +31,6 @@ static unsigned int do_16x16_motion_iteration
   vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
   unsigned int best_err;
   int step_param, further_steps;
-  static int dummy_cost[2 * mv_max + 1];
-  int *mvcost[2]    = { &dummy_cost[mv_max + 1], &dummy_cost[mv_max + 1] };
-  int *mvsadcost[2] = { &dummy_cost[mv_max + 1], &dummy_cost[mv_max + 1] };
-  static int dummy_cost_hp[2 * mv_max_hp + 1];
-  int *mvcost_hp[2]    = { &dummy_cost_hp[mv_max_hp + 1], &dummy_cost_hp[mv_max_hp + 1] };
-  int *mvsadcost_hp[2] = { &dummy_cost_hp[mv_max_hp + 1], &dummy_cost_hp[mv_max_hp + 1] };
 
   int tmp_col_min = x->mv_col_min;
   int tmp_col_max = x->mv_col_max;
@@ -65,8 +59,8 @@ static unsigned int do_16x16_motion_iteration
       step_param,
       x->errorperbit,
       &v_fn_ptr,
-      xd->allow_high_precision_mv ? mvsadcost_hp : mvsadcost,
-      xd->allow_high_precision_mv ? mvcost_hp : mvcost,
+      NULLMVCOST,
+      NULLMVCOST,
       ref_mv);
 
   // Try sub-pixel MC
@@ -78,7 +72,7 @@ static unsigned int do_16x16_motion_iteration
         x, b, d,
         dst_mv, ref_mv,
         x->errorperbit, &v_fn_ptr,
-        xd->allow_high_precision_mv ? mvcost_hp : mvcost,
+        NULLMVCOST,
         & distortion, &sse);
   }
 
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 79e10a5..76accd4 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -42,42 +42,59 @@ void vp8_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
     x->mv_row_max = row_max;
 }
 
-int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                     int Weight, int ishp) {
-  // MV costing is based on the distribution of vectors in the previous frame
-  // and as such will tend to over state the cost of vectors. In addition
-  // coding a new vector can have a knock on effect on the cost of subsequent
-  // vectors and the quality of prediction from NEAR and NEAREST for subsequent
-  // blocks. The "Weight" parameter allows, to a limited extent, for some
-  // account to be taken of these factors.
-  return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp == 0)] +
-           mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp == 0)])
+  MV v;
+  v.row = (mv->as_mv.row - ref->as_mv.row);
+  v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+  return ((mvjcost[vp8_get_mv_joint(v)] +
+           mvcost[0][v.row] + mvcost[1][v.col]) *
+          Weight) >> 7;
+#else
+  return ((mvcost[0][v.row >> (ishp == 0)] +
+           mvcost[1][v.col >> (ishp == 0)])
           * Weight) >> 7;
+#endif
 }
 
-static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                        int error_per_bit, int ishp) {
-  // Ignore costing if mvcost is NULL
-  if (mvcost)
-    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp == 0)] +
-             mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp == 0)])
-            * error_per_bit + 128) >> 8;
+  if (mvcost) {
+    MV v;
+    v.row = (mv->as_mv.row - ref->as_mv.row);
+    v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+    return ((mvjcost[vp8_get_mv_joint(v)] +
+             mvcost[0][v.row] + mvcost[1][v.col]) *
+            error_per_bit + 128) >> 8;
+#else
+    return ((mvcost[0][v.row >> (ishp == 0)] +
+             mvcost[1][v.col >> (ishp == 0)]) * error_per_bit + 128) >> 8;
+#endif
+  }
   return 0;
 }
 
-
-static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
+static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS,
                           int error_per_bit) {
-  // Calculate sad error cost on full pixel basis.
-  // Ignore costing if mvcost is NULL
-  if (mvsadcost)
-    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
-             mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
+
+  if (mvsadcost) {
+    MV v;
+    v.row = (mv->as_mv.row - ref->as_mv.row);
+    v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+    return ((mvjsadcost[vp8_get_mv_joint(v)] +
+             mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
+            error_per_bit + 128) >> 8;
+#else
+    return ((mvsadcost[0][v.row] + mvsadcost[1][v.col])
             * error_per_bit + 128) >> 8;
+#endif
+  }
   return 0;
 }
 
-
 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
   int Len;
   int search_site_count = 0;
@@ -185,7 +202,6 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
     x->ss[search_site_count].offset = Len * stride + Len;
     search_site_count++;
 
-
     // Contract.
     Len /= 2;
   }
@@ -204,18 +220,35 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
  * could reduce the area.
  */
 
-#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
-#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
-#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 : 0) // estimated cost of a motion vector (r,c)
-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+#if CONFIG_NEWMVENTROPY
+/* estimated cost of a motion vector (r,c) */
+#define MVC(r,c) \
+  (mvcost ? \
+  ((mvjcost[((r)!=rr)*2 + ((c)!=rc)] + \
+    mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * error_per_bit + 128 )>>8 : 0)
+#else
+#define MVC(r,c) \
+  (mvcost ? \
+  ((mvcost[0][((r)-rr)>>(xd->allow_high_precision_mv==0)] + \
+    mvcost[1][((c)-rc)>>(xd->allow_high_precision_mv==0)]) * \
+    error_per_bit + 128 )>>8 : 0)
+#endif  /* CONFIG_NEWMVENTROPY */
+
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+
+#define IFMVCV(r,c,s,e) \
+  if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+
+/* pointer to predictor base of a motionvector */
+#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset)))
 
-#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
-#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
-#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+/* returns subpixel variance error function */
+#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
+
+/* checks if (r,c) has better score than previous best */
+#define CHECK_BETTER(v,r,c) \
+  IFMVCV(r,c,{thismse = (DIST(r,c)); if((v = MVC(r,c)+thismse) < besterr) \
+  { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)
 
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define MAX(x,y) (((x)>(y))?(x):(y))
@@ -224,7 +257,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                              int_mv *bestmv, int_mv *ref_mv,
                                              int error_per_bit,
                                              const vp8_variance_fn_ptr_t *vfp,
-                                             int *mvcost[2], int *distortion,
+                                             DEC_MVCOSTS,
+                                             int *distortion,
                                              unsigned int *sse1) {
   unsigned char *z = (*(b->base_src) + b->src);
   MACROBLOCKD *xd = &x->e_mbd;
@@ -268,27 +302,22 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 #endif
 
 
-  if (xd->allow_high_precision_mv) {
-    rr = ref_mv->as_mv.row;
-    rc = ref_mv->as_mv.col;
-    br = bestmv->as_mv.row << 3;
-    bc = bestmv->as_mv.col << 3;
-    hstep = 4;
-    minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
-    maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
-    minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
-    maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
-  } else {
-    rr = ref_mv->as_mv.row >> 1;
-    rc = ref_mv->as_mv.col >> 1;
-    br = bestmv->as_mv.row << 2;
-    bc = bestmv->as_mv.col << 2;
-    hstep = 2;
-    minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
-    maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
-    minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
-    maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
-  }
+  rr = ref_mv->as_mv.row;
+  rc = ref_mv->as_mv.col;
+  br = bestmv->as_mv.row << 3;
+  bc = bestmv->as_mv.col << 3;
+  hstep = 4;
+#if CONFIG_NEWMVENTROPY
+  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+#else
+  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
+  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
+  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
+  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
+#endif
 
   tr = br;
   tc = bc;
@@ -303,8 +332,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   // calculate central point error
   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
-                         xd->allow_high_precision_mv);
+  besterr += mv_err_cost(bestmv, ref_mv, MVCOSTS,
+                         error_per_bit, xd->allow_high_precision_mv);
 
   // TODO: Each subsequent iteration checks at least one point in
   // common with the last iteration could be 2 ( if diag selected)
@@ -407,13 +436,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
       tc = bc;
     }
   }
-  if (x->e_mbd.allow_high_precision_mv) {
-    bestmv->as_mv.row = br;
-    bestmv->as_mv.col = bc;
-  } else {
-    bestmv->as_mv.row = br << 1;
-    bestmv->as_mv.col = bc << 1;
-  }
+  bestmv->as_mv.row = br;
+  bestmv->as_mv.col = bc;
 
   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
@@ -423,25 +447,17 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 }
 #undef MVC
 #undef PRE
-#undef SP
 #undef DIST
 #undef IFMVCV
-#undef ERR
 #undef CHECK_BETTER
 #undef MIN
 #undef MAX
 
-#undef PREHP
-#undef DPHP
-#undef DISTHP
-#undef ERRHP
-
-#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
                                  const vp8_variance_fn_ptr_t *vfp,
-                                 int *mvcost[2], int *distortion,
+                                 DEC_MVCOSTS, int *distortion,
                                  unsigned int *sse1) {
   int bestmse = INT_MAX;
   int_mv startmv;
@@ -478,14 +494,14 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   // calculate central point error
   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
+  bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit,
                          xd->allow_high_precision_mv);
 
   // go left then right and check error
   this_mv.as_mv.row = startmv.as_mv.row;
   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -497,7 +513,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
   this_mv.as_mv.col += 8;
   thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -511,7 +527,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   this_mv.as_mv.col = startmv.as_mv.col;
   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -523,7 +539,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
   this_mv.as_mv.row += 8;
   thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -564,7 +580,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -606,7 +622,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                        b->src_stride, &sse);
   }
 
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -620,7 +636,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   thismse = vfp->svf(y, y_stride,
                      SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
                      z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -644,7 +660,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                        z, b->src_stride, &sse);
   }
 
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -657,7 +673,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   this_mv.as_mv.row += 4;
   thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
                      z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -736,7 +752,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -776,7 +792,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                        z, b->src_stride, &sse);
   }
 
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -787,8 +803,10 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   }
 
   this_mv.as_mv.col += 2;
-  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                     z, b->src_stride, &sse);
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                                xd->allow_high_precision_mv);
 
   if (right < bestmse) {
     *bestmv = this_mv;
@@ -808,7 +826,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
   }
 
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                             xd->allow_high_precision_mv);
 
   if (up < bestmse) {
     *bestmv = this_mv;
@@ -819,7 +838,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
   this_mv.as_mv.row += 2;
   thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                               xd->allow_high_precision_mv);
 
   if (down < bestmse) {
     *bestmv = this_mv;
@@ -892,7 +912,8 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                               xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
     *bestmv = this_mv;
@@ -910,7 +931,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                   int_mv *bestmv, int_mv *ref_mv,
                                   int error_per_bit,
                                   const vp8_variance_fn_ptr_t *vfp,
-                                  int *mvcost[2], int *distortion,
+                                  DEC_MVCOSTS,
+                                  int *distortion,
                                   unsigned int *sse1) {
   int bestmse = INT_MAX;
   int_mv startmv;
@@ -946,14 +968,14 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   // calculate central point error
   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
+  bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit,
                          xd->allow_high_precision_mv);
 
   // go left then right and check error
   this_mv.as_mv.row = startmv.as_mv.row;
   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -965,7 +987,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
   this_mv.as_mv.col += 8;
   thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -979,7 +1001,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   this_mv.as_mv.col = startmv.as_mv.col;
   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -991,7 +1013,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 
   this_mv.as_mv.row += 8;
   thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -1029,7 +1051,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -1063,7 +1085,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   {\
     if (thissad < bestsad)\
     {\
-      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
+      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);\
       if (thissad < bestsad)\
       {\
         bestsad = thissad;\
@@ -1091,8 +1113,8 @@ int vp8_hex_search
   int search_param,
   int sad_per_bit,
   const vp8_variance_fn_ptr_t *vfp,
-  int *mvsadcost[2],
-  int *mvcost[2],
+  DEC_MVSADCOSTS,
+  DEC_MVCOSTS,
   int_mv *center_mv
 ) {
   MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} };
@@ -1128,7 +1150,7 @@ int vp8_hex_search
   this_mv.as_mv.col = bc;
   bestsad = vfp->sdf(what, what_stride, this_offset,
                      in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // hex search
   // j=0
@@ -1240,7 +1262,7 @@ cal_neighbors:
 int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                            int_mv *ref_mv, int_mv *best_mv,
                            int search_param, int sad_per_bit, int *num00,
-                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                           vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                            int_mv *center_mv) {
   int i, j, step;
 
@@ -1264,14 +1286,19 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   unsigned char *check_here;
   int thissad;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1289,7 +1316,7 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what,
                         in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // search_param determines the length of the initial step and hence the number of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1315,7 +1342,7 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
           this_mv.as_mv.row = this_row_offset;
           this_mv.as_mv.col = this_col_offset;
           thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvsadcost, sad_per_bit);
+                                    MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1345,7 +1372,7 @@ int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   return
       fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                  (unsigned int *)(&thissad)) +
-      mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+      mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                   xd->allow_high_precision_mv);
 }
 
@@ -1353,7 +1380,7 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                              int_mv *ref_mv, int_mv *best_mv, int search_param,
                              int sad_per_bit, int *num00,
                              vp8_variance_fn_ptr_t *fn_ptr,
-                             int *mvcost[2], int_mv *center_mv) {
+                             DEC_MVCOSTS, int_mv *center_mv) {
   int i, j, step;
 
   unsigned char *what = (*(b->base_src) + b->src);
@@ -1378,14 +1405,19 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   unsigned char *check_here;
   unsigned int thissad;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1403,7 +1435,7 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride,
                         in_what, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // search_param determines the length of the initial step and hence the number of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1438,7 +1470,7 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
             sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                           mvsadcost, sad_per_bit);
+                                           MVSADCOSTS, sad_per_bit);
 
             if (sad_array[t] < bestsad) {
               bestsad = sad_array[t];
@@ -1462,7 +1494,7 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
             this_mv.as_mv.row = this_row_offset;
             this_mv.as_mv.col = this_col_offset;
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvsadcost, sad_per_bit);
+                                      MVSADCOSTS, sad_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -1492,11 +1524,10 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   return
       fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                  (unsigned int *)(&thissad)) +
-      mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+      mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                   xd->allow_high_precision_mv);
 }
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
@@ -1558,7 +1589,7 @@ int vp8_full_pixel_diamond(VP8_COMP *cpi, MACROBLOCK *x, BLOCK *b,
 
 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                         int sad_per_bit, int distance,
-                        vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                        vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                         int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1582,14 +1613,19 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   int row_max = ref_row + distance;
   int col_min = ref_col - distance;
   int col_max = ref_col + distance;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1603,7 +1639,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
                         in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1627,7 +1663,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 
       this_mv.as_mv.col = c;
       thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                 mvsadcost, sad_per_bit);
+                                 MVSADCOSTS, sad_per_bit);
 
       if (thissad < bestsad) {
         bestsad = thissad;
@@ -1647,7 +1683,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1655,7 +1691,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 
 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                           int sad_per_bit, int distance,
-                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                          vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                           int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1681,14 +1717,19 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   int col_max = ref_col + distance;
 
   unsigned int sad_array[3];
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1702,7 +1743,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1733,7 +1774,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1754,7 +1795,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
       if (thissad < bestsad) {
         this_mv.as_mv.col = c;
         thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvsadcost, sad_per_bit);
+                                   MVSADCOSTS, sad_per_bit);
 
         if (thissad < bestsad) {
           bestsad = thissad;
@@ -1777,7 +1818,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1785,7 +1826,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 
 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                           int sad_per_bit, int distance,
-                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                          vp8_variance_fn_ptr_t *fn_ptr,
+                          DEC_MVCOSTS,
                           int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1812,14 +1854,19 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 
   DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   unsigned int sad_array[3];
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1833,7 +1880,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1864,7 +1911,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1890,7 +1937,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1911,7 +1958,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
       if (thissad < bestsad) {
         this_mv.as_mv.col = c;
         thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvsadcost, sad_per_bit);
+                                   MVSADCOSTS, sad_per_bit);
 
         if (thissad < bestsad) {
           bestsad = thissad;
@@ -1933,7 +1980,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1941,7 +1988,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 
 int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                             int error_per_bit, int search_range,
-                            vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                            vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                             int_mv *center_mv) {
   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   int i, j;
@@ -1957,19 +2004,24 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   int_mv this_mv;
   unsigned int bestsad = INT_MAX;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
-  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
@@ -1986,7 +2038,7 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
         if (thissad < bestsad) {
           this_mv.as_mv.row = this_row_offset;
           this_mv.as_mv.col = this_col_offset;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -2012,7 +2064,7 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     return
         fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -2021,7 +2073,7 @@ int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                               int_mv *ref_mv, int error_per_bit,
                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
-                              int *mvcost[2], int_mv *center_mv) {
+                              DEC_MVCOSTS, int_mv *center_mv) {
   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   int i, j;
   short this_row_offset, this_col_offset;
@@ -2036,19 +2088,24 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   int_mv this_mv;
   unsigned int bestsad = INT_MAX;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
-  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
@@ -2073,7 +2130,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
         if (sad_array[j] < bestsad) {
           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
-          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
           if (sad_array[j] < bestsad) {
             bestsad = sad_array[j];
@@ -2094,7 +2151,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
           if (thissad < bestsad) {
             this_mv.as_mv.row = this_row_offset;
             this_mv.as_mv.col = this_col_offset;
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -2121,7 +2178,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     return
         fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index c27f3bf..afca580 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,6 +15,22 @@
 #include "block.h"
 #include "variance.h"
 
+#if CONFIG_NEWMVENTROPY
+#define MVCOSTS mvjcost, mvcost
+#define MVSADCOSTS mvjsadcost, mvsadcost
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+#define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2]
+#define NULLMVCOST NULL, NULL
+#define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost)
+#else
+#define MVCOSTS mvcost
+#define MVSADCOSTS mvsadcost
+#define DEC_MVCOSTS int *mvcost[2]
+#define DEC_MVSADCOSTS int *mvsadcost[2]
+#define NULLMVCOST NULL
+#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
+#endif  /* CONFIG_NEWMVENTROPY */
+
 #ifdef ENTROPY_STATS
 extern void init_mv_ref_counts();
 extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
@@ -26,7 +42,7 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
 #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))            // Maximum size of the first step in full pel units
 
 extern void vp8_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
-extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                            int Weight, int ishp);
 extern void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride);
 extern void vp8_init3smotion_compensation(MACROBLOCK *x,  int stride);
@@ -48,14 +64,14 @@ extern int vp8_hex_search
   int search_param,
   int error_per_bit,
   const vp8_variance_fn_ptr_t *vf,
-  int *mvsadcost[2],
-  int *mvcost[2],
+  DEC_MVSADCOSTS,
+  DEC_MVCOSTS,
   int_mv *center_mv
 );
 
 typedef int (fractional_mv_step_fp)
 (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2],
+ int error_per_bit, const vp8_variance_fn_ptr_t *vfp, DEC_MVCOSTS,
  int *distortion, unsigned int *sse);
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
@@ -72,7 +88,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
    int sad_per_bit, \
    int distance, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 
@@ -86,7 +102,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
    int sad_per_bit, \
    int distance, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 
@@ -102,7 +118,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
    int sad_per_bit, \
    int *num00, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 68c28c3..fdb3fa1 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -149,6 +149,13 @@ extern int skip_false_count;
 extern int intra_mode_stats[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
 #endif
 
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+extern void init_nmvstats();
+extern void print_nmvstats();
+#endif
+#endif
+
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 unsigned int tot_pm = 0;
@@ -1697,6 +1704,48 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) {
 
 #define M_LOG2_E 0.693147180559945309417
 #define log2f(x) (log (x) / (float) M_LOG2_E)
+
+#if CONFIG_NEWMVENTROPY
+
+static void cal_nmvjointsadcost(int *mvjointsadcost) {
+  mvjointsadcost[0] = 600;
+  mvjointsadcost[1] = 300;
+  mvjointsadcost[2] = 300;
+  mvjointsadcost[0] = 300;
+}
+
+static void cal_nmvsadcosts(int *mvsadcost[2]) {
+  int i = 1;
+
+  mvsadcost [0] [0] = 0;
+  mvsadcost [1] [0] = 0;
+
+  do {
+    double z = 256 * (2 * (log2f(8 * i) + .6));
+    mvsadcost [0][i] = (int) z;
+    mvsadcost [1][i] = (int) z;
+    mvsadcost [0][-i] = (int) z;
+    mvsadcost [1][-i] = (int) z;
+  } while (++i <= MV_MAX);
+}
+
+static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
+  int i = 1;
+
+  mvsadcost [0] [0] = 0;
+  mvsadcost [1] [0] = 0;
+
+  do {
+    double z = 256 * (2 * (log2f(8 * i) + .6));
+    mvsadcost [0][i] = (int) z;
+    mvsadcost [1][i] = (int) z;
+    mvsadcost [0][-i] = (int) z;
+    mvsadcost [1][-i] = (int) z;
+  } while (++i <= MV_MAX);
+}
+
+#else
+
 static void cal_mvsadcosts(int *mvsadcost[2]) {
   int i = 1;
 
@@ -1727,6 +1776,8 @@ static void cal_mvsadcosts_hp(int *mvsadcost[2]) {
   } while (++i <= mvfp_max_hp);
 }
 
+#endif  /* CONFIG_NEWMVENTROPY */
+
 VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   int i;
   volatile union {
@@ -1838,6 +1889,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   vp8_zero(inter_uv_modes);
   vp8_zero(inter_b_modes);
 #endif
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+  init_nmvstats();
+#endif
+#endif
 
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
@@ -1903,19 +1959,32 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   cpi->gf_rate_correction_factor  = 1.0;
   cpi->twopass.est_max_qcorrection_factor  = 1.0;
 
+#if CONFIG_NEWMVENTROPY
+  cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
+  cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
+  cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
+  cpi->mb.nmvsadcost[0] = &cpi->mb.nmvsadcosts[0][MV_MAX];
+  cpi->mb.nmvsadcost[1] = &cpi->mb.nmvsadcosts[1][MV_MAX];
+  cal_nmvsadcosts(cpi->mb.nmvsadcost);
+
+  cpi->mb.nmvcost_hp[0] = &cpi->mb.nmvcosts_hp[0][MV_MAX];
+  cpi->mb.nmvcost_hp[1] = &cpi->mb.nmvcosts_hp[1][MV_MAX];
+  cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX];
+  cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
+  cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
+#else
   cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max + 1];
   cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max + 1];
   cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max + 1];
   cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max + 1];
-
   cal_mvsadcosts(cpi->mb.mvsadcost);
 
   cpi->mb.mvcost_hp[0] = &cpi->mb.mvcosts_hp[0][mv_max_hp + 1];
   cpi->mb.mvcost_hp[1] = &cpi->mb.mvcosts_hp[1][mv_max_hp + 1];
   cpi->mb.mvsadcost_hp[0] = &cpi->mb.mvsadcosts_hp[0][mvfp_max_hp + 1];
   cpi->mb.mvsadcost_hp[1] = &cpi->mb.mvsadcosts_hp[1][mvfp_max_hp + 1];
-
   cal_mvsadcosts_hp(cpi->mb.mvsadcost_hp);
+#endif  /* CONFIG_NEWMVENTROPY */
 
   for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
     cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -2068,6 +2137,12 @@ void vp8_remove_compressor(VP8_PTR *ptr) {
       print_mode_context();
     }
 #endif
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+    if (cpi->pass != 1)
+      print_nmvstats();
+#endif
+#endif
 
 #if CONFIG_INTERNAL_STATS
 
@@ -3697,9 +3772,14 @@ static void encode_frame_to_data_rate
     vp8_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
     vp8_adapt_mode_probs(&cpi->common);
 
+#if CONFIG_NEWMVENTROPY
+    cpi->common.fc.NMVcount = cpi->NMVcount;
+    vp8_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+#else
     vp8_copy(cpi->common.fc.MVcount, cpi->MVcount);
     vp8_copy(cpi->common.fc.MVcount_hp, cpi->MVcount_hp);
     vp8_adapt_mv_probs(&cpi->common);
+#endif  /* CONFIG_NEWMVENTROPY */
     vp8_update_mode_context(&cpi->common);
   }
 
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 04c7ad4..0bdc07f 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -67,10 +67,17 @@
 #endif
 
 typedef struct {
+#if CONFIG_NEWMVENTROPY
+  nmv_context nmvc;
+  int nmvjointcost[MV_JOINTS];
+  int nmvcosts[2][MV_VALS];
+  int nmvcosts_hp[2][MV_VALS];
+#else
   MV_CONTEXT mvc[2];
   int mvcosts[2][MVvals + 1];
   MV_CONTEXT_HP mvc_hp[2];
   int mvcosts_hp[2][MVvals_hp + 1];
+#endif
 
 #ifdef MODE_STATS
   // Stats
@@ -549,8 +556,12 @@ typedef struct VP8_COMP {
   // int uv_mode_count[VP8_UV_MODES];       /* intra MB type cts this frame */
   int y_uv_mode_count[VP8_YMODES][VP8_UV_MODES];
 
+#if CONFIG_NEWMVENTROPY
+  nmv_context_counts NMVcount;
+#else
   unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */
   unsigned int MVcount_hp [2] [MVvals_hp];  /* (row,col) MV cts this frame */
+#endif
 
   unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   // DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]);   //not used any more
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 809279e..e985748 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -132,10 +132,17 @@ void vp8_save_coding_context(VP8_COMP *cpi) {
   // intended for use in a re-code loop in vp8_compress_frame where the
   // quantizer value is adjusted between loop iterations.
 
+#if CONFIG_NEWMVENTROPY
+  cc->nmvc = cm->fc.nmvc;
+  vp8_copy(cc->nmvjointcost,  cpi->mb.nmvjointcost);
+  vp8_copy(cc->nmvcosts,  cpi->mb.nmvcosts);
+  vp8_copy(cc->nmvcosts_hp,  cpi->mb.nmvcosts_hp);
+#else
   vp8_copy(cc->mvc,      cm->fc.mvc);
   vp8_copy(cc->mvcosts,  cpi->mb.mvcosts);
   vp8_copy(cc->mvc_hp,     cm->fc.mvc_hp);
   vp8_copy(cc->mvcosts_hp,  cpi->mb.mvcosts_hp);
+#endif
 
   vp8_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct);
   vp8_copy(cc->mode_context, cm->fc.mode_context);
@@ -188,10 +195,17 @@ void vp8_restore_coding_context(VP8_COMP *cpi) {
   // Restore key state variables to the snapshot state stored in the
   // previous call to vp8_save_coding_context.
 
+#if CONFIG_NEWMVENTROPY
+  cm->fc.nmvc = cc->nmvc;
+  vp8_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
+  vp8_copy(cpi->mb.nmvcosts, cc->nmvcosts);
+  vp8_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
+#else
   vp8_copy(cm->fc.mvc, cc->mvc);
   vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
   vp8_copy(cm->fc.mvc_hp, cc->mvc_hp);
   vp8_copy(cpi->mb.mvcosts_hp, cc->mvcosts_hp);
+#endif
 
   vp8_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct);
   vp8_copy(cm->fc.mode_context, cc->mode_context);
@@ -244,17 +258,17 @@ void vp8_setup_key_frame(VP8_COMP *cpi) {
   vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
   vp8_init_mbmode_probs(& cpi->common);
 
-  vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
-  {
-    int flag[2] = {1, 1};
-    vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
-  }
-  vpx_memcpy(cpi->common.fc.mvc_hp, vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
+  vp8_init_mv_probs(& cpi->common);
+#if CONFIG_NEWMVENTROPY == 0
+  /* this is not really required */
   {
     int flag[2] = {1, 1};
-    vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
+    vp8_build_component_cost_table(
+        cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
+    vp8_build_component_cost_table_hp(
+        cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
   }
-
+#endif
 
   cpi->common.txfm_mode = ALLOW_8X8;
 
@@ -285,6 +299,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi) {
              sizeof(default_vp8_mode_contexts));
 
 }
+
 void vp8_setup_inter_frame(VP8_COMP *cpi) {
 
   cpi->common.txfm_mode = ALLOW_8X8;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 391254b..a2b234e 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -37,6 +37,7 @@
 #include "vpx_mem/vpx_mem.h"
 #include "dct.h"
 #include "vp8/common/systemdependent.h"
+#include "vp8/encoder/encodemv.h"
 
 #include "vp8/common/seg_common.h"
 #include "vp8/common/pred_common.h"
@@ -58,8 +59,6 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
 extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
 #endif
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-
 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
 
 #define INVALID_MV 0x80008000
@@ -376,6 +375,17 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) {
   cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
   vp8_init_mode_costs(cpi);
 
+  if (cpi->common.frame_type != KEY_FRAME)
+  {
+#if CONFIG_NEWMVENTROPY
+    vp8_build_nmv_cost_table(
+        cpi->mb.nmvjointcost,
+        cpi->mb.e_mbd.allow_high_precision_mv ?
+        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
+        &cpi->common.fc.nmvc,
+        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
+#endif
+  }
 }
 
 void vp8_auto_select_speed(VP8_COMP *cpi) {
@@ -404,7 +414,9 @@ void vp8_auto_select_speed(VP8_COMP *cpi) {
       cpi->oxcf.cpu_used = -16;
   */
 
-  if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) {
+  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
+      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
+      milliseconds_for_compress) {
     if (cpi->avg_pick_mode_time == 0) {
       cpi->Speed = 4;
     } else {
@@ -418,7 +430,8 @@ void vp8_auto_select_speed(VP8_COMP *cpi) {
         }
       }
 
-      if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
+      if (milliseconds_for_compress * 100 >
+          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
         cpi->Speed          -= 1;
         cpi->avg_pick_mode_time = 0;
         cpi->avg_encode_time = 0;
@@ -1883,11 +1896,15 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
   x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
 }
 
-static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
-                       B_PREDICTION_MODE this_mode,
-                       int_mv *this_mv, int_mv *this_second_mv,
-                       int_mv seg_mvs[MAX_REF_FRAMES - 1], int_mv *best_ref_mv,
-                       int_mv *second_best_ref_mv, int *mvcost[2]) {
+static int labels2mode(
+  MACROBLOCK *x,
+  int const *labelings, int which_label,
+  B_PREDICTION_MODE this_mode,
+  int_mv *this_mv, int_mv *this_second_mv,
+  int_mv seg_mvs[MAX_REF_FRAMES - 1],
+  int_mv *best_ref_mv,
+  int_mv *second_best_ref_mv,
+  DEC_MVCOSTS) {
   MACROBLOCKD *const xd = & x->e_mbd;
   MODE_INFO *const mic = xd->mode_info_context;
   MB_MODE_INFO * mbmi = &mic->mbmi;
@@ -1922,11 +1939,11 @@ static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
               seg_mvs[mbmi->second_ref_frame - 1].as_int;
           }
 
-          thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost,
+          thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, MVCOSTS,
                                         102, xd->allow_high_precision_mv);
           if (mbmi->second_ref_frame) {
             thismvcost += vp8_mv_bit_cost(this_second_mv, second_best_ref_mv,
-                                          mvcost, 102,
+                                          MVCOSTS, 102,
                                           xd->allow_high_precision_mv);
           }
           break;
@@ -2656,8 +2673,10 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
 
 void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
                        int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
-
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+#if CONFIG_NEWMVENTROPY
+  MV mv;
+#endif
 
   if (mbmi->mode == SPLITMV) {
     int i;
@@ -2665,6 +2684,21 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < x->partition_info->count; i++) {
       if (x->partition_info->bmi[i].mode == NEW4X4) {
         if (x->e_mbd.allow_high_precision_mv) {
+#if CONFIG_NEWMVENTROPY
+          mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+                    - best_ref_mv->as_mv.row);
+          mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+                    - best_ref_mv->as_mv.col);
+          vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+          if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) {
+            mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+                      - second_best_ref_mv->as_mv.row);
+            mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+                      - second_best_ref_mv->as_mv.col);
+            vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+                              &cpi->NMVcount, 1);
+          }
+#else
           cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.row
                                           - best_ref_mv->as_mv.row)]++;
           cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.col
@@ -2675,8 +2709,23 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
             cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.col
                                             - second_best_ref_mv->as_mv.col)]++;
           }
-        } else
-        {
+#endif
+        } else {
+#if CONFIG_NEWMVENTROPY
+          mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+                    - best_ref_mv->as_mv.row);
+          mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+                    - best_ref_mv->as_mv.col);
+          vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+          if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) {
+            mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+                      - second_best_ref_mv->as_mv.row);
+            mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+                      - second_best_ref_mv->as_mv.col);
+            vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+                              &cpi->NMVcount, 0);
+          }
+#else
           cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row
                                      - best_ref_mv->as_mv.row) >> 1)]++;
           cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col
@@ -2687,11 +2736,22 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
             cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.col
                                        - second_best_ref_mv->as_mv.col) >> 1)]++;
           }
+#endif
         }
       }
     }
   } else if (mbmi->mode == NEWMV) {
     if (x->e_mbd.allow_high_precision_mv) {
+#if CONFIG_NEWMVENTROPY
+      mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+      mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+      vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+      if (mbmi->second_ref_frame) {
+        mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+        mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+        vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
+      }
+#else
       cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[0].as_mv.row
                                       - best_ref_mv->as_mv.row)]++;
       cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[0].as_mv.col
@@ -2702,8 +2762,18 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
         cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col
                                         - second_best_ref_mv->as_mv.col)]++;
       }
-    } else
-    {
+#endif
+    } else {
+#if CONFIG_NEWMVENTROPY
+      mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+      mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+      vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+      if (mbmi->second_ref_frame) {
+        mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+        mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+        vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
+      }
+#else
       cpi->MVcount[0][mv_max + ((mbmi->mv[0].as_mv.row
                                  - best_ref_mv->as_mv.row) >> 1)]++;
       cpi->MVcount[1][mv_max + ((mbmi->mv[0].as_mv.col
@@ -2714,6 +2784,7 @@ void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
         cpi->MVcount[1][mv_max + ((mbmi->mv[1].as_mv.col
                                    - second_best_ref_mv->as_mv.col) >> 1)]++;
       }
+#endif
     }
   }
 }
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 035eebb..d57613b 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -187,7 +187,8 @@ static int vp8_temporal_filter_find_matching_mb_c
   // Ignore mv costing by sending NULL pointer instead of cost arrays
   bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
                            step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
-                           NULL, NULL, &best_ref_mv1);
+                           NULLMVCOST, NULLMVCOST,
+                           &best_ref_mv1);
 
 #if ALT_REF_SUBPEL_ENABLED
   // Try sub-pixel MC?
@@ -200,7 +201,8 @@ static int vp8_temporal_filter_find_matching_mb_c
                                            &best_ref_mv1,
                                            x->errorperbit,
                                            &cpi->fn_ptr[BLOCK_16X16],
-                                           NULL, &distortion, &sse);
+                                           NULLMVCOST,
+                                           &distortion, &sse);
   }
 #endif
 
-- 
2.7.4