Merge "vp9_reconinter.h static functions in header converted to global"

author Jim Bankoski <jimbankoski@google.com>

Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)

committer Gerrit Code Review <gerrit@gerrit.golo.chromium.org>

Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)
author Jim Bankoski <jimbankoski@google.com>
Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)
committer Gerrit Code Review <gerrit@gerrit.golo.chromium.org>
Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)
diff --git a/.gitignore b/.gitignore

index aa95d57..2cf3680 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -28,9 +28,7 @@
  /examples/decode_to_md5
  /examples/decode_with_drops
  /examples/decode_with_partial_drops
-/examples/error_resilient
  /examples/example_xma
-/examples/force_keyframe
  /examples/postproc
  /examples/simple_decoder
  /examples/simple_encoder
diff --git a/build/make/Makefile b/build/make/Makefile

index 6894d6d..dd7fb4a 100644 (file)
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -94,6 +94,16 @@ clean::
         rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
         rm -f $(CLEAN-OBJS)
  
+.PHONY: clean
+distclean: clean
+       if [ -z "$(target)" ]; then \
+      rm -f Makefile; \
+      rm -f config.log config.mk; \
+      rm -f vpx_config.[hc] vpx_config.asm; \
+    else \
+      rm -f $(target)-$(TOOLCHAIN).mk; \
+    fi
+
  .PHONY: dist
  dist:
  .PHONY: install
@@ -307,7 +317,7 @@ endef
  ifneq ($(target),)
  include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk
  endif
-ifeq ($(filter clean,$(MAKECMDGOALS)),)
+ifeq ($(filter %clean,$(MAKECMDGOALS)),)
    # Older versions of make don't like -include directives with no arguments
    ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
      -include $(filter %.d,$(OBJS-yes:.o=.d))
diff --git a/build/make/configure.sh b/build/make/configure.sh

index c379c74..7991fb3 100755 (executable)
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -41,7 +41,7 @@ log(){
  
  log_file(){
      log BEGIN $1
-    pr -n -t $1 >>$logfile
+    cat -n $1 >>$logfile
      log END $1
  }
  
@@ -327,7 +327,7 @@ EOF
  
  check_cflags() {
      log check_cflags "$@"
-    check_cc "$@" <<EOF
+    check_cc -Werror "$@" <<EOF
  int x;
  EOF
  }
@@ -341,7 +341,7 @@ check_cxxflags() {
  int x;
  EOF
        ;;
-      *) check_cxx "$@" <<EOF
+      *) check_cxx -Werror "$@" <<EOF
  int x;
  EOF
        ;;
@@ -405,8 +405,8 @@ true
  }
  
  write_common_target_config_mk() {
-    local CC=${CC}
-    local CXX=${CXX}
+    local CC="${CC}"
+    local CXX="${CXX}"
      enabled ccache && CC="ccache ${CC}"
      enabled ccache && CXX="ccache ${CXX}"
      print_webm_license $1 "##" ""
diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl

new file mode 100755 (executable)

index 0000000..18ee80d
--- /dev/null
+++ b/build/make/rtcd.pl
@@ -0,0 +1,414 @@
+#!/usr/bin/env perl
+
+no strict 'refs';
+use warnings;
+use Getopt::Long;
+Getopt::Long::Configure("auto_help");
+
+my %ALL_FUNCS = ();
+my @ALL_ARCHS;
+my @ALL_FORWARD_DECLS;
+my @REQUIRES;
+
+my %opts = ();
+my %disabled = ();
+my %required = ();
+
+my @argv;
+foreach (@ARGV) {
+  $disabled{$1} = 1, next if /--disable-(.*)/;
+  $required{$1} = 1, next if /--require-(.*)/;
+  push @argv, $_;
+}
+
+# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility.
+@ARGV = @argv;
+GetOptions(
+  \%opts,
+  'arch=s',
+  'sym=s',
+  'config=s',
+);
+
+foreach my $opt (qw/arch config/) {
+  if (!defined($opts{$opt})) {
+    warn "--$opt is required!\n";
+    Getopt::Long::HelpMessage('-exit' => 1);
+  }
+}
+
+foreach my $defs_file (@ARGV) {
+  if (!-f $defs_file) {
+    warn "$defs_file: $!\n";
+    Getopt::Long::HelpMessage('-exit' => 1);
+  }
+}
+
+open CONFIG_FILE, $opts{config} or
+  die "Error opening config file '$opts{config}': $!\n";
+
+my %config = ();
+while (<CONFIG_FILE>) {
+  next if !/^CONFIG_/;
+  chomp;
+  my @pair = split /=/;
+  $config{$pair[0]} = $pair[1];
+}
+close CONFIG_FILE;
+
+#
+# Routines for the RTCD DSL to call
+#
+sub vpx_config($) {
+  return (defined $config{$_[0]}) ? $config{$_[0]} : "";
+}
+
+sub specialize {
+  my $fn=$_[0];
+  shift;
+  foreach my $opt (@_) {
+    eval "\$${fn}_${opt}=${fn}_${opt}";
+  }
+}
+
+sub add_proto {
+  my $fn = splice(@_, -2, 1);
+  $ALL_FUNCS{$fn} = \@_;
+  specialize $fn, "c";
+}
+
+sub require {
+  foreach my $fn (keys %ALL_FUNCS) {
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+
+      # if we already have a default, then we can disable it, as we know
+      # we can do better.
+      my $best = eval "\$${fn}_default";
+      if ($best) {
+        my $best_ofn = eval "\$${best}";
+        if ($best_ofn && "$best_ofn" ne "$ofn") {
+          eval "\$${best}_link = 'false'";
+        }
+      }
+      eval "\$${fn}_default=${fn}_${opt}";
+      eval "\$${fn}_${opt}_link='true'";
+    }
+  }
+}
+
+sub forward_decls {
+  push @ALL_FORWARD_DECLS, @_;
+}
+
+#
+# Include the user's directives
+#
+foreach my $f (@ARGV) {
+  open FILE, "<", $f or die "cannot open $f: $!\n";
+  my $contents = join('', <FILE>);
+  close FILE;
+  eval $contents or warn "eval failed: $@\n";
+}
+
+#
+# Process the directives according to the command line
+#
+sub process_forward_decls() {
+  foreach (@ALL_FORWARD_DECLS) {
+    $_->();
+  }
+}
+
+sub determine_indirection {
+  vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS);
+  foreach my $fn (keys %ALL_FUNCS) {
+    my $n = "";
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+      my $link = eval "\$${fn}_${opt}_link";
+      next if $link && $link eq "false";
+      $n .= "x";
+    }
+    if ($n eq "x") {
+      eval "\$${fn}_indirect = 'false'";
+    } else {
+      eval "\$${fn}_indirect = 'true'";
+    }
+  }
+}
+
+sub declare_function_pointers {
+  foreach my $fn (sort keys %ALL_FUNCS) {
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+      print "$rtyp ${ofn}($args);\n";
+    }
+    if (eval "\$${fn}_indirect" eq "false") {
+      print "#define ${fn} ${dfn}\n";
+    } else {
+      print "RTCD_EXTERN $rtyp (*${fn})($args);\n";
+    }
+    print "\n";
+  }
+}
+
+sub set_function_pointers {
+  foreach my $fn (sort keys %ALL_FUNCS) {
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    if (eval "\$${fn}_indirect" eq "true") {
+      print "    $fn = $dfn;\n";
+      foreach my $opt (@_) {
+        my $ofn = eval "\$${fn}_${opt}";
+        next if !$ofn;
+        next if "$ofn" eq "$dfn";
+        my $link = eval "\$${fn}_${opt}_link";
+        next if $link && $link eq "false";
+        my $cond = eval "\$have_${opt}";
+        print "    if (${cond}) $fn = $ofn;\n"
+      }
+    }
+  }
+}
+
+sub filter {
+  my @filtered;
+  foreach (@_) { push @filtered, $_ unless $disabled{$_}; }
+  return @filtered;
+}
+
+#
+# Helper functions for generating the arch specific RTCD files
+#
+sub common_top() {
+  my $include_guard = uc($opts{sym})."_H_";
+  print <<EOF;
+#ifndef ${include_guard}
+#define ${include_guard}
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+
+process_forward_decls();
+print "\n";
+declare_function_pointers("c", @ALL_ARCHS);
+
+print <<EOF;
+void $opts{sym}(void);
+
+EOF
+}
+
+sub common_bottom() {
+  print <<EOF;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
+EOF
+}
+
+sub x86() {
+  determine_indirection("c", @ALL_ARCHS);
+
+  # Assign the helper variable for each enabled extension
+  foreach my $opt (@ALL_ARCHS) {
+    my $opt_uc = uc $opt;
+    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+  }
+
+  common_top;
+  print <<EOF;
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = x86_simd_caps();
+
+    (void)flags;
+
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub arm() {
+  determine_indirection("c", @ALL_ARCHS);
+
+  # Assign the helper variable for each enabled extension
+  foreach my $opt (@ALL_ARCHS) {
+    my $opt_uc = uc $opt;
+    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+  }
+
+  common_top;
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    (void)flags;
+
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub mips() {
+  determine_indirection("c", @ALL_ARCHS);
+  common_top;
+
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+#if HAVE_DSPR2
+#if CONFIG_VP8
+void dsputil_static_init();
+dsputil_static_init();
+#endif
+#if CONFIG_VP9
+void vp9_dsputil_static_init();
+vp9_dsputil_static_init();
+#endif
+#endif
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub unoptimized() {
+  determine_indirection "c";
+  common_top;
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+  set_function_pointers "c";
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+#
+# Main Driver
+#
+
+&require("c");
+if ($opts{arch} eq 'x86') {
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+  x86;
+} elsif ($opts{arch} eq 'x86_64') {
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+  @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/);
+  &require(@REQUIRES);
+  x86;
+} elsif ($opts{arch} eq 'mips32') {
+  @ALL_ARCHS = filter(qw/mips32/);
+  open CONFIG_FILE, $opts{config} or
+    die "Error opening config file '$opts{config}': $!\n";
+  while (<CONFIG_FILE>) {
+    if (/HAVE_DSPR2=yes/) {
+      @ALL_ARCHS = filter(qw/mips32 dspr2/);
+      last;
+    }
+  }
+  close CONFIG_FILE;
+  mips;
+} elsif ($opts{arch} eq 'armv5te') {
+  @ALL_ARCHS = filter(qw/edsp/);
+  arm;
+} elsif ($opts{arch} eq 'armv6') {
+  @ALL_ARCHS = filter(qw/edsp media/);
+  arm;
+} elsif ($opts{arch} eq 'armv7') {
+  @ALL_ARCHS = filter(qw/edsp media neon/);
+  arm;
+} else {
+  unoptimized;
+}
+
+__END__
+
+=head1 NAME
+
+rtcd -
+
+=head1 SYNOPSIS
+
+Usage: rtcd.pl [options] FILE
+
+See 'perldoc rtcd.pl' for more details.
+
+=head1 DESCRIPTION
+
+Reads the Run Time CPU Detections definitions from FILE and generates a
+C header file on stdout.
+
+=head1 OPTIONS
+
+Options:
+  --arch=ARCH       Architecture to generate defs for (required)
+  --disable-EXT     Disable support for EXT extensions
+  --require-EXT     Require support for EXT extensions
+  --sym=SYMBOL      Unique symbol to use for RTCD initialization function
+  --config=FILE     File with CONFIG_FOO=yes lines to parse
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh

deleted file mode 100755 (executable)

index 93c9adc..0000000
--- a/build/make/rtcd.sh
+++ /dev/null
@@ -1,373 +0,0 @@
-#!/bin/sh
-self=$0
-
-usage() {
-  cat <<EOF >&2
-Usage: $self [options] FILE
-
-Reads the Run Time CPU Detections definitions from FILE and generates a
-C header file on stdout.
-
-Options:
-  --arch=ARCH   Architecture to generate defs for (required)
-  --disable-EXT Disable support for EXT extensions
-  --require-EXT Require support for EXT extensions
-  --sym=SYMBOL  Unique symbol to use for RTCD initialization function
-  --config=FILE File with CONFIG_FOO=yes lines to parse
-EOF
-  exit 1
-}
-
-die() {
-  echo "$@" >&2
-  exit 1
-}
-
-die_argument_required() {
-  die "Option $opt requires argument"
-}
-
-for opt; do
-  optval="${opt#*=}"
-  case "$opt" in
-    --arch) die_argument_required;;
-    --arch=*) arch=${optval};;
-    --disable-*) eval "disable_${opt#--disable-}=true";;
-    --require-*) REQUIRES="${REQUIRES}${opt#--require-} ";;
-    --sym) die_argument_required;;
-    --sym=*) symbol=${optval};;
-    --config=*) config_file=${optval};;
-    -h|--help)
-      usage
-      ;;
-    -*)
-      die "Unrecognized option: ${opt%%=*}"
-      ;;
-    *)
-      defs_file="$defs_file $opt"
-      ;;
-  esac
-  shift
-done
-for f in $defs_file; do [ -f "$f" ] || usage; done
-[ -n "$arch" ] || usage
-
-# Import the configuration
-[ -f "$config_file" ] && eval $(grep CONFIG_ "$config_file")
-
-#
-# Routines for the RTCD DSL to call
-#
-prototype() {
-  rtyp=""
-  case "$1" in
-    unsigned) rtyp="$1 "; shift;;
-  esac
-  rtyp="${rtyp}$1"
-  fn="$2"
-  args="$3"
-
-  eval "${2}_rtyp='$rtyp'"
-  eval "${2}_args='$3'"
-  ALL_FUNCS="$ALL_FUNCS $fn"
-  specialize $fn c
-}
-
-specialize() {
-  fn="$1"
-  shift
-  for opt in "$@"; do
-    eval "${fn}_${opt}=${fn}_${opt}"
-  done
-}
-
-require() {
-  for fn in $ALL_FUNCS; do
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-
-      # if we already have a default, then we can disable it, as we know
-      # we can do better.
-      best=$(eval "echo \$${fn}_default")
-      best_ofn=$(eval "echo \$${best}")
-      [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false"
-      eval "${fn}_default=${fn}_${opt}"
-      eval "${fn}_${opt}_link=true"
-    done
-  done
-}
-
-forward_decls() {
-  ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1"
-}
-
-#
-# Include the user's directives
-#
-for f in $defs_file; do
-  . $f
-done
-
-#
-# Process the directives according to the command line
-#
-process_forward_decls() {
-  for fn in $ALL_FORWARD_DECLS; do
-    eval $fn
-  done
-}
-
-determine_indirection() {
-  [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS
-  for fn in $ALL_FUNCS; do
-    n=""
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-      link=$(eval "echo \$${fn}_${opt}_link")
-      [ "$link" = "false" ] && continue
-      n="${n}x"
-    done
-    if [ "$n" = "x" ]; then
-      eval "${fn}_indirect=false"
-    else
-      eval "${fn}_indirect=true"
-    fi
-  done
-}
-
-declare_function_pointers() {
-  for fn in $ALL_FUNCS; do
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-      echo "$rtyp ${ofn}($args);"
-    done
-    if [ "$(eval "echo \$${fn}_indirect")" = "false" ]; then
-      echo "#define ${fn} ${dfn}"
-    else
-      echo "RTCD_EXTERN $rtyp (*${fn})($args);"
-    fi
-    echo
-  done
-}
-
-set_function_pointers() {
-  for fn in $ALL_FUNCS; do
-    n=""
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    if $(eval "echo \$${fn}_indirect"); then
-      echo "    $fn = $dfn;"
-      for opt in "$@"; do
-        ofn=$(eval "echo \$${fn}_${opt}")
-        [ -z "$ofn" ] && continue
-        [ "$ofn" = "$dfn" ] && continue;
-        link=$(eval "echo \$${fn}_${opt}_link")
-        [ "$link" = "false" ] && continue
-        cond="$(eval "echo \$have_${opt}")"
-        echo "    if (${cond}) $fn = $ofn;"
-      done
-    fi
-    echo
-  done
-}
-
-filter() {
-  filtered=""
-  for opt in "$@"; do
-    [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt"
-  done
-  echo $filtered
-}
-
-#
-# Helper functions for generating the arch specific RTCD files
-#
-common_top() {
-  outfile_basename=$(basename ${symbol:-rtcd})
-  include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \
-    tr -c '[A-Z0-9]' _)H_
-  cat <<EOF
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#ifdef RTCD_C
-#define RTCD_EXTERN
-#else
-#define RTCD_EXTERN extern
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-$(process_forward_decls)
-
-$(declare_function_pointers c $ALL_ARCHS)
-
-void ${symbol:-rtcd}(void);
-EOF
-}
-
-common_bottom() {
-  cat <<EOF
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
-EOF
-}
-
-x86() {
-  determine_indirection c $ALL_ARCHS
-
-  # Assign the helper variable for each enabled extension
-  for opt in $ALL_ARCHS; do
-    uc=$(echo $opt | tr '[a-z]' '[A-Z]')
-    eval "have_${opt}=\"flags & HAS_${uc}\""
-  done
-
-  cat <<EOF
-$(common_top)
-
-#ifdef RTCD_C
-#include "vpx_ports/x86.h"
-static void setup_rtcd_internal(void)
-{
-    int flags = x86_simd_caps();
-
-    (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-arm() {
-  determine_indirection c $ALL_ARCHS
-
-  # Assign the helper variable for each enabled extension
-  for opt in $ALL_ARCHS; do
-    uc=$(echo $opt | tr '[a-z]' '[A-Z]')
-    eval "have_${opt}=\"flags & HAS_${uc}\""
-  done
-
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-#include "vpx_ports/arm.h"
-static void setup_rtcd_internal(void)
-{
-    int flags = arm_cpu_caps();
-
-    (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-
-mips() {
-  determine_indirection c $ALL_ARCHS
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c $ALL_ARCHS)
-#if HAVE_DSPR2
-#if CONFIG_VP8
-void dsputil_static_init();
-dsputil_static_init();
-#endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
-#endif
-#endif
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-unoptimized() {
-  determine_indirection c
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c)
-}
-#endif
-$(common_bottom)
-EOF
-
-}
-#
-# Main Driver
-#
-ALL_FUNCS=$(export LC_ALL=C; echo $ALL_FUNCS | tr ' ' '\n' | sort |tr '\n' ' ')
-require c
-case $arch in
-  x86)
-    ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
-    x86
-    ;;
-  x86_64)
-    ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
-    REQUIRES=${REQUIRES:-mmx sse sse2}
-    require $(filter $REQUIRES)
-    x86
-    ;;
-  mips32)
-    ALL_ARCHS=$(filter mips32)
-    dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file"))
-    HAVE_DSPR2="${dspr2#*=}"
-    if [ "$HAVE_DSPR2" = "yes" ]; then
-        ALL_ARCHS=$(filter mips32 dspr2)
-    fi
-    mips
-    ;;
-  armv5te)
-    ALL_ARCHS=$(filter edsp)
-    arm
-    ;;
-  armv6)
-    ALL_ARCHS=$(filter edsp media)
-    arm
-    ;;
-  armv7)
-    ALL_ARCHS=$(filter edsp media neon)
-    arm
-    ;;
-  *)
-    unoptimized
-    ;;
-esac
diff --git a/configure b/configure

index 9f5a435..d2f17b0 100755 (executable)
--- a/configure
+++ b/configure
@@ -160,6 +160,20 @@ for t in ${all_targets}; do
      [ -f ${source_path}/${t}.mk ] && enable_feature ${t}
  done
  
+for util in make perl; do
+    if ! ${util} --version >/dev/null; then
+        die "${util} is required to build."
+    fi
+done
+
+
+if [ "`cd ${source_path} && pwd`" != "`pwd`" ]; then
+  # test to see if source_path already configured
+  if [ -f ${source_path}/vpx_config.h ]; then
+    die "source directory already configured; run 'make distclean' there first"
+  fi
+fi
+
  # check installed doxygen version
  doxy_version=$(doxygen --version 2>/dev/null)
  doxy_major=${doxy_version%%.*}
diff --git a/examples.mk b/examples.mk

index aeb54ab..5c6e42d 100644 (file)
--- a/examples.mk
+++ b/examples.mk
@@ -62,11 +62,12 @@ vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h
  vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
  vp9_spatial_scalable_encoder.SRCS += video_common.h
  vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c
+vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h
  vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
  vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
  
  ifeq ($(CONFIG_SHARED),no)
-UTILS-$(CONFIG_VP9_ENCODER)    += resize_util.c
+EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
  endif
  
  # XMA example disabled for now, not used in VP8
@@ -123,9 +124,6 @@ twopass_encoder.SRCS            += video_common.h
  twopass_encoder.SRCS            += video_writer.h video_writer.c
  twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
  twopass_encoder.DESCRIPTION      = Two-pass encoder loop
-EXAMPLES-$(CONFIG_VP8_ENCODER)  += force_keyframe.c
-force_keyframe.GUID              = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
-force_keyframe.DESCRIPTION       = Force generation of keyframes
  ifeq ($(CONFIG_DECODERS),yes)
  EXAMPLES-$(CONFIG_VP8_ENCODER)  += decode_with_drops.c
  decode_with_drops.SRCS          += ivfdec.h ivfdec.c
@@ -150,6 +148,10 @@ vp8_set_maps.SRCS                  += video_writer.h video_writer.c
  vp8_set_maps.GUID                   = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
  vp8_set_maps.DESCRIPTION            = VP8 set active and ROI maps
  EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
+vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
+vp8cx_set_ref.SRCS                 += video_common.h
+vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
  vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
  vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame
  
diff --git a/examples/force_keyframe.c b/examples/force_keyframe.c

deleted file mode 100644 (file)

index 6531e47..0000000
--- a/examples/force_keyframe.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-// Forcing A Keyframe
-// ==================
-//
-// This is an example demonstrating how to control placement of keyframes
-// on a frame-by-frame basis.
-//
-// Configuration
-// -------------
-// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the
-// flags passed to `vpx_codec_control()`. In this example, we force a
-// keyframe every 8 frames.
-//
-// Observing The Effects
-// ---------------------
-// The output of the encoder examples shows a 'K' rather than a dot '.'
-// when the encoder generates a keyframe. Note that every 8 frames a 'K'
-// is output.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static void mem_put_le16(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-}
-
-static void mem_put_le32(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-    mem[2] = val>>16;
-    mem[3] = val>>24;
-}
-
-static void die(const char *fmt, ...) {
-    va_list ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
-
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
-
-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-
-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
-    }
-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, (unsigned int)pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
-}
-
-int main(int argc, char **argv) {
-    FILE                *infile, *outfile;
-    vpx_codec_ctx_t      codec;
-    vpx_codec_enc_cfg_t  cfg;
-    int                  frame_cnt = 0;
-    vpx_image_t          raw;
-    vpx_codec_err_t      res;
-    long                 width;
-    long                 height;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-
-    /* Open files */
-    if(argc!=5)
-        die("Usage: %s <width> <height> <infile> <outfile>\n", argv[0]);
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
-        die("Faile to allocate image", width, height);
-    if(!(outfile = fopen(argv[4], "wb")))
-        die("Failed to open %s for writing", argv[4]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(interface, &cfg, 0);
-    if(res) {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-        return EXIT_FAILURE;
-    }
-
-    /* Update the default configuration with our settings */
-    cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate
-                            / cfg.g_w / cfg.g_h;
-    cfg.g_w = width;
-    cfg.g_h = height;
-
-    write_ivf_file_header(outfile, &cfg, 0);
-
-
-        /* Open input file for this encoding pass */
-        if(!(infile = fopen(argv[3], "rb")))
-            die("Failed to open %s for reading", argv[3]);
-
-        /* Initialize codec */
-        if(vpx_codec_enc_init(&codec, interface, &cfg, 0))
-            die_codec(&codec, "Failed to initialize encoder");
-
-        frame_avail = 1;
-        got_data = 0;
-        while(frame_avail || got_data) {
-            vpx_codec_iter_t iter = NULL;
-            const vpx_codec_cx_pkt_t *pkt;
-
-            if(!(frame_cnt & 7))
-                flags |= VPX_EFLAG_FORCE_KF;
-            else
-                flags &= ~VPX_EFLAG_FORCE_KF;
-            frame_avail = read_frame(infile, &raw);
-            if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
-                                1, flags, VPX_DL_REALTIME))
-                die_codec(&codec, "Failed to encode frame");
-            got_data = 0;
-            while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-                got_data = 1;
-                switch(pkt->kind) {
-                case VPX_CODEC_CX_FRAME_PKT:
-                    write_ivf_frame_header(outfile, pkt);
-                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                                  outfile);
-                    break;
-                default:
-                    break;
-                }
-                printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-                fflush(stdout);
-            }
-            frame_cnt++;
-        }
-        printf("\n");
-        fclose(infile);
-
-    printf("Processed %d frames.\n",frame_cnt-1);
-    vpx_img_free(&raw);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
-
-    /* Try to rewrite the file header with the actual frame count */
-    if(!fseek(outfile, 0, SEEK_SET))
-        write_ivf_file_header(outfile, &cfg, frame_cnt-1);
-    fclose(outfile);
-    return EXIT_SUCCESS;
-}
diff --git a/resize_util.c b/examples/resize_util.c

similarity index 100%

rename from resize_util.c

rename to examples/resize_util.c
diff --git a/examples/simple_encoder.c b/examples/simple_encoder.c

index 6ecd498..af58091 100644 (file)
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -64,6 +64,15 @@
  // frame is shown for one frame-time in duration. The flags parameter is
  // unused in this example. The deadline is set to VPX_DL_REALTIME to
  // make the example run as quickly as possible.
+
+// Forced Keyframes
+// ----------------
+// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the
+// flags passed to `vpx_codec_control()`. In this example, we force a
+// keyframe every <keyframe-interval> frames. Note, the output stream can
+// contain additional keyframes beyond those that have been forced using the
+// VPX_EFLAG_FORCE_KF flag because of automatic keyframe placement by the
+// encoder.
  //
  // Processing The Encoded Data
  // ---------------------------
@@ -103,8 +112,8 @@ static const char *exec_name;
  void usage_exit() {
    fprintf(stderr,
            "Usage: %s <codec> <width> <height> <infile> <outfile> "
-              "[<error-resilient>]\nSee comments in simple_encoder.c for more "
-              "information.\n",
+              "<keyframe-interval> [<error-resilient>]\nSee comments in "
+              "simple_encoder.c for more information.\n",
            exec_name);
    exit(EXIT_FAILURE);
  }
@@ -112,11 +121,12 @@ void usage_exit() {
  static void encode_frame(vpx_codec_ctx_t *codec,
                           vpx_image_t *img,
                           int frame_index,
+                         int flags,
                           VpxVideoWriter *writer) {
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
-                                               VPX_DL_GOOD_QUALITY);
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1,
+                                               flags, VPX_DL_GOOD_QUALITY);
    if (res != VPX_CODEC_OK)
      die_codec(codec, "Failed to encode frame");
  
@@ -148,15 +158,20 @@ int main(int argc, char **argv) {
    const VpxInterface *encoder = NULL;
    const int fps = 30;        // TODO(dkovalev) add command line argument
    const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  int keyframe_interval = 0;
+
+  // TODO(dkovalev): Add some simple command line parsing code to make the
+  // command line more flexible.
    const char *codec_arg = NULL;
    const char *width_arg = NULL;
    const char *height_arg = NULL;
    const char *infile_arg = NULL;
    const char *outfile_arg = NULL;
+  const char *keyframe_interval_arg = NULL;
  
    exec_name = argv[0];
  
-  if (argc < 6)
+  if (argc < 7)
      die("Invalid number of arguments");
  
    codec_arg = argv[1];
@@ -164,6 +179,7 @@ int main(int argc, char **argv) {
    height_arg = argv[3];
    infile_arg = argv[4];
    outfile_arg = argv[5];
+  keyframe_interval_arg = argv[6];
  
    encoder = get_vpx_encoder_by_name(codec_arg);
    if (!encoder)
@@ -187,6 +203,10 @@ int main(int argc, char **argv) {
      die("Failed to allocate image.");
    }
  
+  keyframe_interval = strtol(keyframe_interval_arg, NULL, 0);
+  if (keyframe_interval < 0)
+    die("Invalid keyframe interval value.");
+
    printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
  
    res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
@@ -198,7 +218,7 @@ int main(int argc, char **argv) {
    cfg.g_timebase.num = info.time_base.numerator;
    cfg.g_timebase.den = info.time_base.denominator;
    cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = argc > 6 ? strtol(argv[6], NULL, 0) : 0;
+  cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
  
    writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
    if (!writer)
@@ -210,9 +230,13 @@ int main(int argc, char **argv) {
    if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
      die_codec(&codec, "Failed to initialize encoder");
  
-  while (vpx_img_read(&raw, infile))
-    encode_frame(&codec, &raw, frame_count++, writer);
-  encode_frame(&codec, NULL, -1, writer);  // flush the encoder
+  while (vpx_img_read(&raw, infile)) {
+    int flags = 0;
+    if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
+      flags |= VPX_EFLAG_FORCE_KF;
+    encode_frame(&codec, &raw, frame_count++, flags, writer);
+  }
+  encode_frame(&codec, NULL, -1, 0, writer);  // flush the encoder
  
    printf("\n");
    fclose(infile);
diff --git a/examples/vp8cx_set_ref.c b/examples/vp8cx_set_ref.c

index f87dd35..9b6d11b 100644 (file)
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -48,212 +48,140 @@
  
  #include <stdio.h>
  #include <stdlib.h>
-#include <stdarg.h>
  #include <string.h>
+
  #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
  #include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static void mem_put_le16(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-}
+#include "vpx/vpx_encoder.h"
  
-static void mem_put_le32(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-    mem[2] = val>>16;
-    mem[3] = val>>24;
-}
+#include "./tools_common.h"
+#include "./video_writer.h"
  
-static void die(const char *fmt, ...) {
-    va_list ap;
+static const char *exec_name;
  
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
  }
  
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
-
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
-
-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-
-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
+static void encode_frame(vpx_codec_ctx_t *codec,
+                         vpx_image_t *img,
+                         int frame_index,
+                         VpxVideoWriter *writer) {
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
+                                               VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK)
+    die_codec(codec, "Failed to encode frame");
+
+  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+      if (!vpx_video_writer_write_frame(writer,
+                                        pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(codec, "Failed to write compressed frame");
+      }
+
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
      }
-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, (unsigned int)pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
+  }
  }
  
  int main(int argc, char **argv) {
-    FILE                *infile, *outfile;
-    vpx_codec_ctx_t      codec;
-    vpx_codec_enc_cfg_t  cfg;
-    int                  frame_cnt = 0;
-    vpx_image_t          raw;
-    vpx_codec_err_t      res;
-    long                 width;
-    long                 height;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-    int                  update_frame_num = 0;
-
-    /* Open files */
-    if(argc!=6)
-        die("Usage: %s <width> <height> <infile> <outfile> <frame>\n",
-            argv[0]);
-
-        update_frame_num = atoi(argv[5]);
-        if(!update_frame_num)
-            die("Couldn't parse frame number '%s'\n", argv[5]);
-
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
-        die("Faile to allocate image", width, height);
-    if(!(outfile = fopen(argv[4], "wb")))
-        die("Failed to open %s for writing", argv[4]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(interface, &cfg, 0);
-    if(res) {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-        return EXIT_FAILURE;
+  FILE *infile = NULL;
+  vpx_codec_ctx_t codec = {0};
+  vpx_codec_enc_cfg_t cfg = {0};
+  int frame_count = 0;
+  vpx_image_t raw;
+  vpx_codec_err_t res;
+  VpxVideoInfo info = {0};
+  VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
+  int update_frame_num = 0;
+  const int fps = 30;        // TODO(dkovalev) add command line argument
+  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+
+  exec_name = argv[0];
+
+  if (argc != 6)
+    die("Invalid number of arguments");
+
+  // TODO(dkovalev): add vp9 support and rename the file accordingly
+  encoder = get_vpx_encoder_by_name("vp8");
+  if (!encoder)
+    die("Unsupported codec.");
+
+  update_frame_num = atoi(argv[5]);
+  if (!update_frame_num)
+    die("Couldn't parse frame number '%s'\n", argv[5]);
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = strtol(argv[1], NULL, 0);
+  info.frame_height = strtol(argv[2], NULL, 0);
+  info.time_base.numerator = 1;
+  info.time_base.denominator = fps;
+
+  if (info.frame_width <= 0 ||
+      info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 ||
+      (info.frame_height % 2) != 0) {
+    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+  }
+
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+                                             info.frame_height, 1)) {
+    die("Failed to allocate image.");
+  }
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
+
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
+  if (res)
+    die_codec(&codec, "Failed to get default codec config.");
+
+  cfg.g_w = info.frame_width;
+  cfg.g_h = info.frame_height;
+  cfg.g_timebase.num = info.time_base.numerator;
+  cfg.g_timebase.den = info.time_base.denominator;
+  cfg.rc_target_bitrate = bitrate;
+
+  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  if (!writer)
+    die("Failed to open %s for writing.", argv[4]);
+
+  if (!(infile = fopen(argv[3], "rb")))
+    die("Failed to open %s for reading.", argv[3]);
+
+  if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
+    die_codec(&codec, "Failed to initialize encoder");
+
+  while (vpx_img_read(&raw, infile)) {
+    if (frame_count + 1 == update_frame_num) {
+      vpx_ref_frame_t ref;
+      ref.frame_type = VP8_LAST_FRAME;
+      ref.img = raw;
+      if (vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref))
+        die_codec(&codec, "Failed to set reference frame");
      }
  
-    /* Update the default configuration with our settings */
-    cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate
-                            / cfg.g_w / cfg.g_h;
-    cfg.g_w = width;
-    cfg.g_h = height;
-
-    write_ivf_file_header(outfile, &cfg, 0);
-
-
-        /* Open input file for this encoding pass */
-        if(!(infile = fopen(argv[3], "rb")))
-            die("Failed to open %s for reading", argv[3]);
-
-        /* Initialize codec */
-        if(vpx_codec_enc_init(&codec, interface, &cfg, 0))
-            die_codec(&codec, "Failed to initialize encoder");
-
-        frame_avail = 1;
-        got_data = 0;
-        while(frame_avail || got_data) {
-            vpx_codec_iter_t iter = NULL;
-            const vpx_codec_cx_pkt_t *pkt;
-
-            frame_avail = read_frame(infile, &raw);
-
-            if(frame_cnt + 1 == update_frame_num) {
-                vpx_ref_frame_t ref;
-
-                ref.frame_type = VP8_LAST_FRAME;
-                ref.img        = raw;
+    encode_frame(&codec, &raw, frame_count++, writer);
+  }
+  encode_frame(&codec, NULL, -1, writer);
  
-                if(vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref))
-                    die_codec(&codec, "Failed to set reference frame");
-            }
+  printf("\n");
+  fclose(infile);
+  printf("Processed %d frames.\n", frame_count);
  
-            if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
-                                1, flags, VPX_DL_REALTIME))
-                die_codec(&codec, "Failed to encode frame");
-            got_data = 0;
-            while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-                got_data = 1;
-                switch(pkt->kind) {
-                case VPX_CODEC_CX_FRAME_PKT:
-                    write_ivf_frame_header(outfile, pkt);
-                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                                  outfile);
-                    break;
-                default:
-                    break;
-                }
-                printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-                fflush(stdout);
-            }
-            frame_cnt++;
-        }
-        printf("\n");
-        fclose(infile);
+  vpx_img_free(&raw);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec.");
  
-    printf("Processed %d frames.\n",frame_cnt-1);
-    vpx_img_free(&raw);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
+  vpx_video_writer_close(writer);
  
-    /* Try to rewrite the file header with the actual frame count */
-    if(!fseek(outfile, 0, SEEK_SET))
-        write_ivf_file_header(outfile, &cfg, frame_cnt-1);
-    fclose(outfile);
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
  }
diff --git a/examples/vp9_spatial_scalable_encoder.c b/examples/vp9_spatial_scalable_encoder.c

index 98dc3f5..5333b11 100644 (file)
--- a/examples/vp9_spatial_scalable_encoder.c
+++ b/examples/vp9_spatial_scalable_encoder.c
@@ -26,6 +26,7 @@
  #include "vpx/svc_context.h"
  #include "vpx/vp8cx.h"
  #include "vpx/vpx_encoder.h"
+#include "./vpxstats.h"
  
  static const struct arg_enum_list encoding_mode_enum[] = {
    {"i", INTER_LAYER_PREDICTION_I},
@@ -60,12 +61,19 @@ static const arg_def_t quantizers_arg =
  static const arg_def_t quantizers_keyframe_arg =
      ARG_DEF("qn", "quantizers-keyframe", 1, "quantizers for key frames (lowest "
          "to highest layer)");
+static const arg_def_t passes_arg =
+    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
+static const arg_def_t pass_arg =
+    ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
+static const arg_def_t fpf_name_arg =
+    ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
  
  static const arg_def_t *svc_args[] = {
    &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
    &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
    &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,
-  &quantizers_keyframe_arg, NULL
+  &quantizers_keyframe_arg,               &passes_arg,       &pass_arg,
+  &fpf_name_arg,      NULL
  };
  
  static const SVC_ENCODING_MODE default_encoding_mode =
@@ -85,6 +93,10 @@ typedef struct {
    const char *output_filename;
    uint32_t frames_to_code;
    uint32_t frames_to_skip;
+  struct VpxInputContext input_ctx;
+  stats_io_t rc_stats;
+  int passes;
+  int pass;
  } AppInput;
  
  static const char *exec_name;
@@ -105,6 +117,9 @@ static void parse_command_line(int argc, const char **argv_,
    char **argi = NULL;
    char **argj = NULL;
    vpx_codec_err_t res;
+  int passes = 0;
+  int pass = 0;
+  const char *fpf_file_name = NULL;
  
    // initialize SvcContext with parameters that will be passed to vpx_svc_init
    svc_ctx->log_level = SVC_LOG_DEBUG;
@@ -159,11 +174,53 @@ static void parse_command_line(int argc, const char **argv_,
        vpx_svc_set_quantizers(svc_ctx, arg.val, 0);
      } else if (arg_match(&arg, &quantizers_keyframe_arg, argi)) {
        vpx_svc_set_quantizers(svc_ctx, arg.val, 1);
+    } else if (arg_match(&arg, &passes_arg, argi)) {
+      passes = arg_parse_uint(&arg);
+      if (passes < 1 || passes > 2) {
+        die("Error: Invalid number of passes (%d)\n", passes);
+      }
+    } else if (arg_match(&arg, &pass_arg, argi)) {
+      pass = arg_parse_uint(&arg);
+      if (pass < 1 || pass > 2) {
+        die("Error: Invalid pass selected (%d)\n", pass);
+      }
+    } else if (arg_match(&arg, &fpf_name_arg, argi)) {
+      fpf_file_name = arg.val;
      } else {
        ++argj;
      }
    }
  
+  if (passes == 0 || passes == 1) {
+    if (pass) {
+      fprintf(stderr, "pass is ignored since there's only one pass\n");
+    }
+    enc_cfg->g_pass = VPX_RC_ONE_PASS;
+  } else {
+    if (pass == 0) {
+      die("pass must be specified when passes is 2\n");
+    }
+
+    if (fpf_file_name == NULL) {
+      die("fpf must be specified when passes is 2\n");
+    }
+
+    if (pass == 1) {
+      enc_cfg->g_pass = VPX_RC_FIRST_PASS;
+      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
+        fatal("Failed to open statistics store");
+      }
+    } else {
+      enc_cfg->g_pass = VPX_RC_LAST_PASS;
+      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
+        fatal("Failed to open statistics store");
+      }
+      enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
+    }
+    app_input->passes = passes;
+    app_input->pass = pass;
+  }
+
    // Check for unrecognized options
    for (argi = argv; *argi; ++argi)
      if (argi[0][0] == '-' && strlen(argi[0]) > 1)
@@ -234,10 +291,14 @@ int main(int argc, const char **argv) {
        VPX_CODEC_OK) {
      die("Failed to get output resolution");
    }
-  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
-                                 &info);
-  if (!writer)
-    die("Failed to open %s for writing\n", app_input.output_filename);
+
+  if (!(app_input.passes == 2 && app_input.pass == 1)) {
+    // We don't save the bitstream for the 1st pass on two pass rate control
+    writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
+                                   &info);
+    if (!writer)
+      die("Failed to open %s for writing\n", app_input.output_filename);
+  }
  
    // skip initial frames
    for (i = 0; i < app_input.frames_to_skip; ++i)
@@ -254,11 +315,18 @@ int main(int argc, const char **argv) {
      if (res != VPX_CODEC_OK) {
        die_codec(&codec, "Failed to encode frame");
      }
-    if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
-      vpx_video_writer_write_frame(writer,
-                                   vpx_svc_get_buffer(&svc_ctx),
-                                   vpx_svc_get_frame_size(&svc_ctx),
-                                   pts);
+    if (!(app_input.passes == 2 && app_input.pass == 1)) {
+      if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
+        vpx_video_writer_write_frame(writer,
+                                     vpx_svc_get_buffer(&svc_ctx),
+                                     vpx_svc_get_frame_size(&svc_ctx),
+                                     pts);
+      }
+    }
+    if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) {
+      stats_write(&app_input.rc_stats,
+                  vpx_svc_get_rc_stats_buffer(&svc_ctx),
+                  vpx_svc_get_rc_stats_buffer_size(&svc_ctx));
      }
      ++frame_cnt;
      pts += frame_duration;
@@ -269,7 +337,12 @@ int main(int argc, const char **argv) {
    fclose(infile);
    if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
  
-  vpx_video_writer_close(writer);
+  if (app_input.passes == 2)
+    stats_close(&app_input.rc_stats, 1);
+
+  if (writer) {
+    vpx_video_writer_close(writer);
+  }
  
    vpx_img_free(&raw);
  
diff --git a/libs.mk b/libs.mk

index 302d2af..a5c4b76 100644 (file)
--- a/libs.mk
+++ b/libs.mk
@@ -49,7 +49,7 @@ endif # !gcc
  define rtcd_h_template
  $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
         @echo "    [CREATE] $$@"
-       $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
+       $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
            --sym=$(1) \
            --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
            $$(RTCD_OPTIONS) $$^ > $$@
@@ -162,7 +162,7 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
  endif
  
  CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
+CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
@@ -236,6 +236,13 @@ vpx.def: $(call enabled,CODEC_EXPORTS)
              --out=$@ $^
  CLEAN-OBJS += vpx.def
  
+# Assembly files that are included, but don't define symbols themselves.
+# Filtered out to avoid Visual Studio build warnings.
+ASM_INCLUDES := \
+    third_party/x86inc/x86inc.asm \
+    vpx_config.asm \
+    vpx_ports/x86_abi_support.asm \
+
  vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
         @echo "    [CREATE] $@"
         $(qexec)$(GEN_VCPROJ) \
@@ -246,7 +253,8 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
              --proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
              --module-def=vpx.def \
              --ver=$(CONFIG_VS_VERSION) \
-            --out=$@ $(CFLAGS) $^ \
+            --out=$@ $(CFLAGS) \
+            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
              --src-path-bare="$(SRC_PATH_BARE)" \
  
  PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc

index c92e723..569ff26 100644 (file)
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -79,7 +79,7 @@ TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
    cfg_.rc_min_quantizer = 0;
  
    ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       20);
  
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  }
@@ -95,7 +95,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
    cfg_.rc_min_quantizer = 40;
  
    ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       20);
  
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  }
@@ -107,6 +107,6 @@ using std::tr1::make_tuple;
  
  VP9_INSTANTIATE_TEST_CASE(
      CpuSpeedTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood),
-    ::testing::Range(0, 5));
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::Range(0, 8));
  }  // namespace
diff --git a/test/datarate_test.cc b/test/datarate_test.cc

index 39c9a5a..7793cca 100644 (file)
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -577,5 +577,5 @@ TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) {
  VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
  VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9,
                            ::testing::Values(::libvpx_test::kOnePassGood),
-                          ::testing::Range(2, 5));
+                          ::testing::Range(2, 6));
  }  // namespace
diff --git a/test/register_state_check.h b/test/register_state_check.h

index 479a42d..7e3d053 100644 (file)
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -11,14 +11,15 @@
  #ifndef TEST_REGISTER_STATE_CHECK_H_
  #define TEST_REGISTER_STATE_CHECK_H_
  
-#ifdef _WIN64
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+
+#if defined(_WIN64)
  
  #define _WIN32_LEAN_AND_MEAN
  #include <windows.h>
  #include <winnt.h>
  
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
  namespace testing {
  namespace internal {
  
@@ -81,7 +82,61 @@ class RegisterStateCheck {
  
  }  // namespace libvpx_test
  
-#else  // !_WIN64
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) \
+      && !CONFIG_SHARED && HAVE_NEON
+
+#include "vpx/vpx_integer.h"
+
+extern "C" {
+// Save the d8-d15 registers into store.
+void vp9_push_neon(int64_t *store);
+}
+
+namespace libvpx_test {
+
+// Compares the state of d8-d15 at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// arm platform.
+// Usage:
+// {
+//   RegisterStateCheck reg_check;
+//   FunctionToVerify();
+// }
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+
+ private:
+  static bool StoreRegisters(int64_t store[8]) {
+    vp9_push_neon(store);
+    return true;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  bool Check() const {
+    if (!initialized_) return false;
+    int64_t post_store[8];
+    vp9_push_neon(post_store);
+    for (int i = 0; i < 8; ++i) {
+      EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
+          << i + 8 << " has been modified";
+    }
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  bool initialized_;
+  int64_t pre_store_[8];
+};
+
+#define REGISTER_STATE_CHECK(statement) do { \
+  libvpx_test::RegisterStateCheck reg_check; \
+  statement;                               \
+} while (false)
+
+}  // namespace libvpx_test
+
+#else
  
  namespace libvpx_test {
  
diff --git a/test/vp8_boolcoder_test.cc b/test/vp8_boolcoder_test.cc

index 7c6c601..9cd1987 100644 (file)
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -35,14 +35,14 @@ const uint8_t secret_key[16] = {
    0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
  };
  
-void encrypt_buffer(uint8_t *buffer, int size) {
-  for (int i = 0; i < size; ++i) {
+void encrypt_buffer(uint8_t *buffer, size_t size) {
+  for (size_t i = 0; i < size; ++i) {
      buffer[i] ^= secret_key[i & 15];
    }
  }
  
  void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
-                           uint8_t *output, int count) {
+                     uint8_t *output, int count) {
    const size_t offset = input - reinterpret_cast<uint8_t*>(decrypt_state);
    for (int i = 0; i < count; i++) {
      output[i] = input[i] ^ secret_key[(offset + i) & 15];
diff --git a/test/vp8_decrypt_test.cc b/test/vp8_decrypt_test.cc

index b092509..1b5b083 100644 (file)
--- a/test/vp8_decrypt_test.cc
+++ b/test/vp8_decrypt_test.cc
@@ -26,9 +26,9 @@ const uint8_t test_key[16] = {
    0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
  };
  
-void encrypt_buffer(const uint8_t *src, uint8_t *dst,
-                    int size, int offset = 0) {
-  for (int i = 0; i < size; ++i) {
+void encrypt_buffer(const uint8_t *src, uint8_t *dst, size_t size,
+                    ptrdiff_t offset) {
+  for (size_t i = 0; i < size; ++i) {
      dst[i] = src[i] ^ test_key[(offset + i) & 15];
    }
  }
@@ -61,7 +61,7 @@ TEST(TestDecrypt, DecryptWorks) {
  
  #if CONFIG_DECRYPT
    std::vector<uint8_t> encrypted(video.frame_size());
-  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size());
+  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0);
    vp8_decrypt_init di = { test_decrypt_cb, &encrypted[0] };
    decoder.Control(VP8D_SET_DECRYPTOR, &di);
  #endif  // CONFIG_DECRYPT
diff --git a/third_party/nestegg/README.webm b/third_party/nestegg/README.webm

index 7860a7c..8e3760b 100644 (file)
--- a/third_party/nestegg/README.webm
+++ b/third_party/nestegg/README.webm
@@ -18,3 +18,7 @@ nestegg.c|975 col 6| warning: ‘r’ may be used uninitialized in this function
  - fix track_number uint64->uint32 warnings
  - fix track_scale double->uint64 warning
  - nestegg_packet_track: fix uint64->uint32 warning
+- ne_read_(string|binary|block): normalize size_t usage
+- ne_parse: normalize size_t usage
+- quiet read related uint64->size_t warnings
+- ne_buffer_read: quiet uint64->size_t warning
diff --git a/third_party/nestegg/src/nestegg.c b/third_party/nestegg/src/nestegg.c

index 35ce9f1..c7e2b02 100644 (file)
--- a/third_party/nestegg/src/nestegg.c
+++ b/third_party/nestegg/src/nestegg.c
@@ -694,14 +694,15 @@ ne_read_string(nestegg * ctx, char ** val, uint64_t length)
  {
    char * str;
    int r;
+  const size_t alloc_size = (size_t)length + 1;
  
    if (length == 0 || length > LIMIT_STRING)
      return -1;
-  str = ne_pool_alloc(length + 1, ctx->alloc_pool);
-  r = ne_io_read(ctx->io, (unsigned char *) str, length);
+  str = ne_pool_alloc(alloc_size, ctx->alloc_pool);
+  r = ne_io_read(ctx->io, (unsigned char *) str, alloc_size - 1);
    if (r != 1)
      return r;
-  str[length] = '\0';
+  str[alloc_size - 1] = '\0';
    *val = str;
    return 1;
  }
@@ -711,9 +712,9 @@ ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length)
  {
    if (length == 0 || length > LIMIT_BINARY)
      return -1;
-  val->data = ne_pool_alloc(length, ctx->alloc_pool);
-  val->length = length;
-  return ne_io_read(ctx->io, val->data, length);
+  val->length = (size_t)length;
+  val->data = ne_pool_alloc(val->length, ctx->alloc_pool);
+  return ne_io_read(ctx->io, val->data, val->length);
  }
  
  static int
@@ -1043,7 +1044,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset
            ne_read_single_master(ctx, element);
          continue;
        } else {
-        r = ne_read_simple(ctx, element, size);
+        r = ne_read_simple(ctx, element, (size_t)size);
          if (r < 0)
            break;
        }
@@ -1062,7 +1063,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset
  
        if (id != ID_VOID && id != ID_CRC32)
          ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id);
-      r = ne_io_read_skip(ctx->io, size);
+      r = ne_io_read_skip(ctx->io, (size_t)size);
        if (r != 1)
          break;
      }
@@ -1151,7 +1152,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui
    r = ne_read_vint(io, &lace, &length);
    if (r != 1)
      return r;
-  *read += length;
+  assert(length <= 8);
+  *read += (size_t)length;
  
    sizes[i] = lace;
    sum = sizes[i];
@@ -1163,7 +1165,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui
      r = ne_read_svint(io, &slace, &length);
      if (r != 1)
        return r;
-    *read += length;
+    assert(length <= 8);
+    *read += (size_t)length;
      sizes[i] = sizes[i - 1] + slace;
      sum += sizes[i];
      i += 1;
@@ -1263,7 +1266,8 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
    if (track_number == 0 || (unsigned int)track_number != track_number)
      return -1;
  
-  consumed += length;
+  assert(length <= 8);
+  consumed += (size_t)length;
  
    r = ne_read_int(ctx->io, &timecode, 2);
    if (r != 1)
@@ -1307,7 +1311,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
    case LACING_XIPH:
      if (frames == 1)
        return -1;
-    r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    r = ne_read_xiph_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
      if (r != 1)
        return r;
      break;
@@ -1320,7 +1324,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
    case LACING_EBML:
      if (frames == 1)
        return -1;
-    r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    r = ne_read_ebml_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
      if (r != 1)
        return r;
      break;
@@ -1365,9 +1369,9 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
        return -1;
      }
      f = ne_alloc(sizeof(*f));
-    f->data = ne_alloc(frame_sizes[i]);
-    f->length = frame_sizes[i];
-    r = ne_io_read(ctx->io, f->data, frame_sizes[i]);
+    f->length = (size_t)frame_sizes[i];
+    f->data = ne_alloc(f->length);
+    r = ne_io_read(ctx->io, f->data, f->length);
      if (r != 1) {
        free(f->data);
        free(f);
@@ -1406,7 +1410,8 @@ ne_read_discard_padding(nestegg * ctx, nestegg_packet * pkt)
    if (!element)
      return 1;
  
-  r = ne_read_simple(ctx, element, size);
+  assert((size_t)size == size);
+  r = ne_read_simple(ctx, element, (size_t)size);
    if (r != 1)
      return r;
    storage = (struct ebml_type *) (ctx->ancestor->data + element->offset);
@@ -1600,7 +1605,7 @@ ne_buffer_read(void * buffer, size_t length, void * user_data)
    struct sniff_buffer * sb = user_data;
  
    int rv = 1;
-  size_t available = sb->length - sb->offset;
+  size_t available = sb->length - (size_t)sb->offset;
  
    if (available < length)
      return 0;
@@ -2074,7 +2079,7 @@ nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item,
          p += sizes[i];
        }
        *data = p;
-      *length = sizes[item];
+      *length = (size_t)sizes[item];
    } else {
      *data = codec_private.data;
      *length = codec_private.length;
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl

new file mode 100644 (file)

index 0000000..130d965
--- /dev/null
+++ b/vp8/common/rtcd_defs.pl
@@ -0,0 +1,541 @@
+sub vp8_common_forward_decls() {
+print <<EOF
+/*
+ * VP8
+ */
+
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp8_common_forward_decls/;
+
+#
+# system state
+#
+add_proto qw/void vp8_clear_system_state/, "";
+specialize qw/vp8_clear_system_state mmx/;
+$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
+
+#
+# Dequant
+#
+add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
+specialize qw/vp8_dequantize_b mmx media neon/;
+$vp8_dequantize_b_media=vp8_dequantize_b_v6;
+
+add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
+specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
+$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
+$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
+$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
+
+add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
+
+add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
+$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
+
+add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
+
+
+add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
+$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
+$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
+$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
+$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
+$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
+$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
+$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
+$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
+$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
+$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
+$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
+$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
+$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
+$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
+$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
+$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
+$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
+
+#
+# IDCT
+#
+#idct16
+add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
+specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/;
+$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
+$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
+
+#iwalsh1
+add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
+$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
+# no asm yet
+
+#iwalsh16
+add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/;
+$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
+$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
+
+#idct1_scalar_add
+add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
+specialize qw/vp8_dc_only_idct_add     mmx media neon dspr2/;
+$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
+$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
+
+#
+# RECON
+#
+add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/;
+$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
+$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
+
+add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/;
+$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
+$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
+
+add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/;
+$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
+$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
+
+add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
+specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/;
+#TODO: fix assembly for neon
+
+add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
+specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/;
+
+add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
+specialize qw/vp8_intra4x4_predict media/;
+$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
+
+#
+# Postproc
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes") {
+    add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+    specialize qw/vp8_mbpost_proc_down mmx sse2/;
+    $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
+
+    add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+    specialize qw/vp8_mbpost_proc_across_ip sse2/;
+    $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
+
+    add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
+    specialize qw/vp8_post_proc_down_and_across_mb_row sse2/;
+
+    add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
+    specialize qw/vp8_plane_add_noise mmx sse2/;
+    $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
+
+    add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    specialize qw/vp8_filter_by_weight16x16 sse2/;
+
+    add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    specialize qw/vp8_filter_by_weight8x8 sse2/;
+
+    add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    # no asm yet
+}
+
+#
+# Subpixel
+#
+add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
+$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
+$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
+$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
+
+add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/;
+$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
+$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
+
+add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
+$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
+
+add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
+
+#
+# Whole-pixel Variance
+#
+add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance4x4 mmx sse2/;
+$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x8 mmx sse2 media neon/;
+$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
+$vp8_variance8x8_media=vp8_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x16 mmx sse2 neon/;
+$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x8 mmx sse2 neon/;
+$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x16 mmx sse2 media neon/;
+$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
+$vp8_variance16x16_media=vp8_variance16x16_armv6;
+
+#
+# Sub-pixel Variance
+#
+add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
+$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
+$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
+$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
+$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
+$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
+$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
+$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
+$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+
+#
+# Single block SAD
+#
+add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad4x4 mmx sse2 neon/;
+$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
+
+add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x8 mmx sse2 neon/;
+$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
+
+add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x16 mmx sse2 neon/;
+$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
+
+add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x8 mmx sse2 neon/;
+$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
+
+add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
+$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
+$vp8_sad16x16_media=vp8_sad16x16_armv6;
+
+#
+# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+#
+add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x3 sse3/;
+
+add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x3 sse3/;
+
+add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x3 sse3/;
+
+add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x3 sse3 ssse3/;
+
+# Note the only difference in the following prototypes is that they return into
+# an array of short
+add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad4x4x8 sse4_1/;
+$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
+
+add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x8x8 sse4_1/;
+$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
+
+add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x16x8 sse4_1/;
+$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
+
+add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x8x8 sse4_1/;
+$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
+
+add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x16x8 sse4_1/;
+$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
+
+#
+# Multi-block SAD, comparing a reference to N independent blocks
+#
+add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x4d sse3/;
+
+add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x4d sse3/;
+
+add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x4d sse3/;
+
+add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x4d sse3/;
+
+add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x4d sse3/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
+
+#
+# Sum of squares (vector)
+#
+add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
+specialize qw/vp8_get_mb_ss mmx sse2/;
+
+#
+# SSE (Sum Squared Error)
+#
+add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
+$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
+
+add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
+$vp8_mse16x16_media=vp8_mse16x16_armv6;
+
+add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
+specialize qw/vp8_get4x4sse_cs mmx neon/;
+
+#
+# Block copy
+#
+if ($opts{arch} =~ /x86/) {
+    add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+    specialize qw/vp8_copy32xn sse2 sse3/;
+}
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
+
+    add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
+
+    add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
+}
+
+#
+# Forward DCT
+#
+add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+
+add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+
+add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+
+#
+# Quantizer
+#
+add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
+
+add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+
+add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+# no asm yet
+
+add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+specialize qw/vp8_fast_quantize_b_pair neon/;
+
+add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
+specialize qw/vp8_quantize_mb neon/;
+
+add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
+specialize qw/vp8_quantize_mby neon/;
+
+add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
+specialize qw/vp8_quantize_mbuv neon/;
+
+#
+# Block subtraction
+#
+add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
+specialize qw/vp8_block_error mmx sse2/;
+$vp8_block_error_sse2=vp8_block_error_xmm;
+
+add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
+specialize qw/vp8_mbblock_error mmx sse2/;
+$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
+
+add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
+specialize qw/vp8_mbuverror mmx sse2/;
+$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
+
+add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
+specialize qw/vp8_subtract_b mmx sse2 media neon/;
+$vp8_subtract_b_media=vp8_subtract_b_armv6;
+
+add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
+specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+
+add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+
+#
+# Motion search
+#
+add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_full_search_sad sse3 sse4_1/;
+$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
+$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
+
+add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_refining_search_sad sse3/;
+$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
+
+add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
+
+#
+# Alt-ref Noise Reduction (ARNR)
+#
+if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
+    add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
+    specialize qw/vp8_temporal_filter_apply sse2/;
+}
+
+#
+# Pick Loopfilter
+#
+add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_partial_frame neon/;
+
+#
+# Denoiser filter
+#
+if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
+    add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+    specialize qw/vp8_denoiser_filter sse2 neon/;
+}
+
+# End of encoder only functions
+}
+1;
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh

deleted file mode 100644 (file)

index 28e6754..0000000
--- a/vp8/common/rtcd_defs.sh
+++ /dev/null
@@ -1,542 +0,0 @@
-vp8_common_forward_decls() {
-cat <<EOF
-/*
- * VP8
- */
-
-struct blockd;
-struct macroblockd;
-struct loop_filter_info;
-
-/* Encoder forward decls */
-struct block;
-struct macroblock;
-struct variance_vtable;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp8_common_forward_decls
-
-#
-# system state
-#
-prototype void vp8_clear_system_state ""
-specialize vp8_clear_system_state mmx
-vp8_clear_system_state_mmx=vpx_reset_mmx_state
-
-#
-# Dequant
-#
-prototype void vp8_dequantize_b "struct blockd*, short *dqc"
-specialize vp8_dequantize_b mmx media neon
-vp8_dequantize_b_media=vp8_dequantize_b_v6
-
-prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
-specialize vp8_dequant_idct_add mmx media neon dspr2
-vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
-vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
-
-prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
-specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
-specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-#
-# Loopfilter
-#
-prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
-vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
-vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
-
-prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
-vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
-vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
-
-prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
-vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
-vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
-
-prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
-vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
-vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
-
-
-prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
-vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c
-vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx
-vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2
-vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6
-vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon
-
-prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbh mmx sse2 media neon
-vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c
-vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx
-vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2
-vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6
-vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon
-
-prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bv mmx sse2 media neon
-vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c
-vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx
-vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2
-vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6
-vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon
-
-prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bh mmx sse2 media neon
-vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c
-vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx
-vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
-vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
-vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
-
-#
-# IDCT
-#
-#idct16
-prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
-specialize vp8_short_idct4x4llm mmx media neon dspr2
-vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
-vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
-
-#iwalsh1
-prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4_1 dspr2
-vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
-# no asm yet
-
-#iwalsh16
-prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
-vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
-vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
-
-#idct1_scalar_add
-prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
-specialize vp8_dc_only_idct_add        mmx media neon dspr2
-vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
-vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
-
-#
-# RECON
-#
-prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
-vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
-vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
-
-prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x8 mmx media neon dspr2
-vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
-vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
-
-prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x4 mmx media neon dspr2
-vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
-
-prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
-specialize vp8_build_intra_predictors_mby_s sse2 ssse3
-#TODO: fix assembly for neon
-
-prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
-specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-specialize vp8_intra4x4_predict media
-vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
-
-#
-# Postproc
-#
-if [ "$CONFIG_POSTPROC" = "yes" ]; then
-    prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit"
-    specialize vp8_mbpost_proc_down mmx sse2
-    vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm
-
-    prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit"
-    specialize vp8_mbpost_proc_across_ip sse2
-    vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
-
-    prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
-    specialize vp8_post_proc_down_and_across_mb_row sse2
-
-    prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
-    specialize vp8_plane_add_noise mmx sse2
-    vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt
-
-    prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    specialize vp8_filter_by_weight16x16 sse2
-
-    prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    specialize vp8_filter_by_weight8x8 sse2
-
-    prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    # no asm yet
-fi
-
-#
-# Subpixel
-#
-prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
-vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
-
-prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
-vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
-
-prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
-vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
-
-prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
-vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
-vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
-
-prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
-vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6
-
-prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
-vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6
-
-prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x4 mmx media neon
-vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6
-
-prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict4x4 mmx media neon
-vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6
-
-#
-# Whole-pixel Variance
-#
-prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance4x4 mmx sse2
-vp8_variance4x4_sse2=vp8_variance4x4_wmt
-
-prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance8x8 mmx sse2 media neon
-vp8_variance8x8_sse2=vp8_variance8x8_wmt
-vp8_variance8x8_media=vp8_variance8x8_armv6
-
-prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance8x16 mmx sse2 neon
-vp8_variance8x16_sse2=vp8_variance8x16_wmt
-
-prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance16x8 mmx sse2 neon
-vp8_variance16x8_sse2=vp8_variance16x8_wmt
-
-prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance16x16 mmx sse2 media neon
-vp8_variance16x16_sse2=vp8_variance16x16_wmt
-vp8_variance16x16_media=vp8_variance16x16_armv6
-
-#
-# Sub-pixel Variance
-#
-prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance4x4 mmx sse2
-vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon
-vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt
-vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6
-
-prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x16 mmx sse2
-vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3
-vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon
-vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt
-vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon
-vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt
-vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon
-vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt
-vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon
-vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt
-vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
-
-#
-# Single block SAD
-#
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad4x4 mmx sse2 neon
-vp8_sad4x4_sse2=vp8_sad4x4_wmt
-
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x8 mmx sse2 neon
-vp8_sad8x8_sse2=vp8_sad8x8_wmt
-
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x16 mmx sse2 neon
-vp8_sad8x16_sse2=vp8_sad8x16_wmt
-
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x8 mmx sse2 neon
-vp8_sad16x8_sse2=vp8_sad16x8_wmt
-
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x16 mmx sse2 sse3 media neon
-vp8_sad16x16_sse2=vp8_sad16x16_wmt
-vp8_sad16x16_media=vp8_sad16x16_armv6
-
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x3 sse3
-
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x3 sse3
-
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x3 sse3
-
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x3 sse3 ssse3
-
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x3 sse3 ssse3
-
-# Note the only difference in the following prototypes is that they return into
-# an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad4x4x8 sse4_1
-vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
-
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x8x8 sse4_1
-vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
-
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x16x8 sse4_1
-vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
-
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x8x8 sse4_1
-vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
-
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x16x8 sse4_1
-vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x4d sse3
-
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x4d sse3
-
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x4d sse3
-
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x4d sse3
-
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x4d sse3
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then
-
-#
-# Sum of squares (vector)
-#
-prototype unsigned int vp8_get_mb_ss "const short *"
-specialize vp8_get_mb_ss mmx sse2
-
-#
-# SSE (Sum Squared Error)
-#
-prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_mse16x16 mmx sse2
-vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt
-
-prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_mse16x16 mmx sse2 media neon
-vp8_mse16x16_sse2=vp8_mse16x16_wmt
-vp8_mse16x16_media=vp8_mse16x16_armv6
-
-prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride"
-specialize vp8_get4x4sse_cs mmx neon
-
-#
-# Block copy
-#
-case $arch in
-    x86*)
-    prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
-    specialize vp8_copy32xn sse2 sse3
-    ;;
-esac
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
-    [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
-
-    prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp8_ssim_parms_8x8 $sse2_on_x86_64
-
-    prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
-fi
-
-#
-# Forward DCT
-#
-prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct4x4 mmx sse2 media neon
-vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
-
-prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct8x4 mmx sse2 media neon
-vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
-
-prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
-specialize vp8_short_walsh4x4 sse2 media neon
-vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
-
-#
-# Quantizer
-#
-prototype void vp8_regular_quantize_b "struct block *, struct blockd *"
-specialize vp8_regular_quantize_b sse2 #sse4_1
-# TODO(johann) Update sse4 implementation and re-enable
-#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4
-
-prototype void vp8_fast_quantize_b "struct block *, struct blockd *"
-specialize vp8_fast_quantize_b sse2 ssse3 media neon
-vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6
-
-prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-# no asm yet
-
-prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-specialize vp8_fast_quantize_b_pair neon
-
-prototype void vp8_quantize_mb "struct macroblock *"
-specialize vp8_quantize_mb neon
-
-prototype void vp8_quantize_mby "struct macroblock *"
-specialize vp8_quantize_mby neon
-
-prototype void vp8_quantize_mbuv "struct macroblock *"
-specialize vp8_quantize_mbuv neon
-
-#
-# Block subtraction
-#
-prototype int vp8_block_error "short *coeff, short *dqcoeff"
-specialize vp8_block_error mmx sse2
-vp8_block_error_sse2=vp8_block_error_xmm
-
-prototype int vp8_mbblock_error "struct macroblock *mb, int dc"
-specialize vp8_mbblock_error mmx sse2
-vp8_mbblock_error_sse2=vp8_mbblock_error_xmm
-
-prototype int vp8_mbuverror "struct macroblock *mb"
-specialize vp8_mbuverror mmx sse2
-vp8_mbuverror_sse2=vp8_mbuverror_xmm
-
-prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch"
-specialize vp8_subtract_b mmx sse2 media neon
-vp8_subtract_b_media=vp8_subtract_b_armv6
-
-prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"
-specialize vp8_subtract_mby mmx sse2 media neon
-vp8_subtract_mby_media=vp8_subtract_mby_armv6
-
-prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"
-specialize vp8_subtract_mbuv mmx sse2 media neon
-vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6
-
-#
-# Motion search
-#
-prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_full_search_sad sse3 sse4_1
-vp8_full_search_sad_sse3=vp8_full_search_sadx3
-vp8_full_search_sad_sse4_1=vp8_full_search_sadx8
-
-prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_refining_search_sad sse3
-vp8_refining_search_sad_sse3=vp8_refining_search_sadx4
-
-prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4
-
-#
-# Alt-ref Noise Reduction (ARNR)
-#
-if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then
-    prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
-    specialize vp8_temporal_filter_apply sse2
-fi
-
-#
-# Pick Loopfilter
-#
-prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_partial_frame neon
-
-#
-# Denoiser filter
-#
-if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
-    prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
-    specialize vp8_denoiser_filter sse2 neon
-fi
-
-# End of encoder only functions
-fi
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk

index ac91d7a..dfb54a5 100644 (file)
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -47,7 +47,7 @@ VP8_COMMON_SRCS-yes += common/quant_common.h
  VP8_COMMON_SRCS-yes += common/reconinter.h
  VP8_COMMON_SRCS-yes += common/reconintra4x4.h
  VP8_COMMON_SRCS-yes += common/rtcd.c
-VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP8_COMMON_SRCS-yes += common/rtcd_defs.pl
  VP8_COMMON_SRCS-yes += common/setupintrarecon.h
  VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
  VP8_COMMON_SRCS-yes += common/systemdependent.h
@@ -189,4 +189,4 @@ VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequant_idct_neon.c
  VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequantizeb_neon.c
  
  
-$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h

index 7e1f147..e7033e4 100644 (file)
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -26,7 +26,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
  void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
  int vp9_use_mv_hp(const MV *ref);
  
-#define NMV_UPDATE_PROB  252
+#define MV_UPDATE_PROB 252
  
  /* Symbols for coding which components are zero jointly */
  #define MV_JOINTS     4
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c

index e5f3fed..d179f42 100644 (file)
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -191,7 +191,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                               MODE_INFO *mi, const MODE_INFO *prev_mi,
                               MV_REFERENCE_FRAME ref_frame,
                               int_mv *mv_ref_list,
-                             int block_idx, int mi_row, int mi_col) {
+                             int block, int mi_row, int mi_col) {
    const int *ref_sign_bias = cm->ref_frame_sign_bias;
    int i, refmv_count = 0;
    const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -214,19 +214,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
        const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
        // Keep counts for entropy encoding.
        context_counter += mode_2_counter[candidate->mode];
+      different_ref_found = 1;
  
-      // Check if the candidate comes from the same reference frame.
-      if (candidate->ref_frame[0] == ref_frame) {
-        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0,
-                                         mv_ref->col, block_idx));
-        different_ref_found = candidate->ref_frame[1] != ref_frame;
-      } else {
-        if (candidate->ref_frame[1] == ref_frame)
-          // Add second motion vector if it has the same ref_frame.
-          ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1,
-                                           mv_ref->col, block_idx));
-        different_ref_found = 1;
-      }
+      if (candidate->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block));
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block));
      }
    }
  
@@ -239,15 +232,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
        const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col +
                                              mv_ref->row
                                              * xd->mode_info_stride]->mbmi;
+      different_ref_found = 1;
  
-      if (candidate->ref_frame[0] == ref_frame) {
+      if (candidate->ref_frame[0] == ref_frame)
          ADD_MV_REF_LIST(candidate->mv[0]);
-        different_ref_found = candidate->ref_frame[1] != ref_frame;
-      } else {
-        if (candidate->ref_frame[1] == ref_frame)
-          ADD_MV_REF_LIST(candidate->mv[1]);
-        different_ref_found = 1;
-      }
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(candidate->mv[1]);
      }
    }
  
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h

deleted file mode 100644 (file)

index 2220868..0000000
--- a/vp9/common/vp9_onyx.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_ONYX_H_
-#define VP9_COMMON_VP9_ONYX_H_
-
-#include "./vpx_config.h"
-#include "vpx/internal/vpx_codec_internal.h"
-#include "vpx/vp8cx.h"
-#include "vpx_scale/yv12config.h"
-#include "vp9/common/vp9_ppflags.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_SEGMENTS 8
-
-  typedef int *VP9_PTR;
-
-  /* Create/destroy static data structures. */
-
-  typedef enum {
-    NORMAL      = 0,
-    FOURFIVE    = 1,
-    THREEFIVE   = 2,
-    ONETWO      = 3
-  } VPX_SCALING;
-
-  typedef enum {
-    VP9_LAST_FLAG = 1,
-    VP9_GOLD_FLAG = 2,
-    VP9_ALT_FLAG = 4
-  } VP9_REFFRAME;
-
-
-  typedef enum {
-    USAGE_LOCAL_FILE_PLAYBACK   = 0x0,
-    USAGE_STREAM_FROM_SERVER    = 0x1,
-    USAGE_CONSTRAINED_QUALITY   = 0x2,
-    USAGE_CONSTANT_QUALITY      = 0x3,
-  } END_USAGE;
-
-
-  typedef enum {
-    MODE_GOODQUALITY    = 0x1,
-    MODE_BESTQUALITY    = 0x2,
-    MODE_FIRSTPASS      = 0x3,
-    MODE_SECONDPASS     = 0x4,
-    MODE_SECONDPASS_BEST = 0x5,
-    MODE_REALTIME       = 0x6,
-  } MODE;
-
-  typedef enum {
-    FRAMEFLAGS_KEY    = 1,
-    FRAMEFLAGS_GOLDEN = 2,
-    FRAMEFLAGS_ALTREF = 4,
-  } FRAMETYPE_FLAGS;
-
-  typedef enum {
-    NO_AQ = 0,
-    VARIANCE_AQ = 1,
-    COMPLEXITY_AQ = 2,
-    AQ_MODES_COUNT  // This should always be the last member of the enum
-  } AQ_MODES;
-
-  typedef struct {
-    int version;  // 4 versions of bitstream defined:
-                  //   0 - best quality/slowest decode,
-                  //   3 - lowest quality/fastest decode
-    int width;  // width of data passed to the compressor
-    int height;  // height of data passed to the compressor
-    double framerate;  // set to passed in framerate
-    int64_t target_bandwidth;  // bandwidth to be used in kilobits per second
-
-    int noise_sensitivity;  // pre processing blur: recommendation 0
-    int sharpness;  // sharpening output: recommendation 0:
-    int cpu_used;
-    unsigned int rc_max_intra_bitrate_pct;
-
-    // mode ->
-    // (0)=Realtime/Live Encoding. This mode is optimized for realtime
-    //     encoding (for example, capturing a television signal or feed from
-    //     a live camera). ( speed setting controls how fast )
-    // (1)=Good Quality Fast Encoding. The encoder balances quality with the
-    //     amount of time it takes to encode the output. ( speed setting
-    //     controls how fast )
-    // (2)=One Pass - Best Quality. The encoder places priority on the
-    //     quality of the output over encoding speed. The output is compressed
-    //     at the highest possible quality. This option takes the longest
-    //     amount of time to encode. ( speed setting ignored )
-    // (3)=Two Pass - First Pass. The encoder generates a file of statistics
-    //     for use in the second encoding pass. ( speed setting controls how
-    //     fast )
-    // (4)=Two Pass - Second Pass. The encoder uses the statistics that were
-    //     generated in the first encoding pass to create the compressed
-    //     output. ( speed setting controls how fast )
-    // (5)=Two Pass - Second Pass Best.  The encoder uses the statistics that
-    //     were generated in the first encoding pass to create the compressed
-    //     output using the highest possible quality, and taking a
-    //    longer amount of time to encode.. ( speed setting ignored )
-    int mode;
-
-    // Key Framing Operations
-    int auto_key;  // autodetect cut scenes and set the keyframes
-    int key_freq;  // maximum distance to key frame.
-
-    int lag_in_frames;  // how many frames lag before we start encoding
-
-    // ----------------------------------------------------------------
-    // DATARATE CONTROL OPTIONS
-
-    int end_usage;  // vbr or cbr
-
-    // buffer targeting aggressiveness
-    int under_shoot_pct;
-    int over_shoot_pct;
-
-    // buffering parameters
-    int64_t starting_buffer_level;  // in seconds
-    int64_t optimal_buffer_level;
-    int64_t maximum_buffer_size;
-
-    // Frame drop threshold.
-    int drop_frames_water_mark;
-
-    // controlling quality
-    int fixed_q;
-    int worst_allowed_q;
-    int best_allowed_q;
-    int cq_level;
-    int lossless;
-    int aq_mode;  // Adaptive Quantization mode
-
-    // two pass datarate control
-    int two_pass_vbrbias;        // two pass datarate control tweaks
-    int two_pass_vbrmin_section;
-    int two_pass_vbrmax_section;
-    // END DATARATE CONTROL OPTIONS
-    // ----------------------------------------------------------------
-
-    // Spatial and temporal scalability.
-    int ss_number_layers;  // Number of spatial layers.
-    int ts_number_layers;  // Number of temporal layers.
-    // Bitrate allocation for spatial layers.
-    int ss_target_bitrate[VPX_SS_MAX_LAYERS];
-    // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
-    int ts_target_bitrate[VPX_TS_MAX_LAYERS];
-    int ts_rate_decimator[VPX_TS_MAX_LAYERS];
-
-    // these parameters aren't to be used in final build don't use!!!
-    int play_alternate;
-    int alt_freq;
-
-    int encode_breakout;  // early breakout : for video conf recommend 800
-
-    /* Bitfield defining the error resiliency features to enable.
-     * Can provide decodable frames after losses in previous
-     * frames and decodable partitions after losses in the same frame.
-     */
-    unsigned int error_resilient_mode;
-
-    /* Bitfield defining the parallel decoding mode where the
-     * decoding in successive frames may be conducted in parallel
-     * just by decoding the frame headers.
-     */
-    unsigned int frame_parallel_decoding_mode;
-
-    int arnr_max_frames;
-    int arnr_strength;
-    int arnr_type;
-
-    int tile_columns;
-    int tile_rows;
-
-    struct vpx_fixed_buf         two_pass_stats_in;
-    struct vpx_codec_pkt_list  *output_pkt_list;
-
-    vp8e_tuning tuning;
-  } VP9_CONFIG;
-
-
-  void vp9_initialize_enc();
-
-  VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf);
-  void vp9_remove_compressor(VP9_PTR *comp);
-
-  void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf);
-
-  // receive a frames worth of data. caller can assume that a copy of this
-  // frame is made and not just a copy of the pointer..
-  int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags,
-                            YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
-                            int64_t end_time_stamp);
-
-  int vp9_get_compressed_data(VP9_PTR comp, unsigned int *frame_flags,
-                              size_t *size, uint8_t *dest,
-                              int64_t *time_stamp, int64_t *time_end,
-                              int flush);
-
-  int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
-                                vp9_ppflags_t *flags);
-
-  int vp9_use_as_reference(VP9_PTR comp, int ref_frame_flags);
-
-  int vp9_update_reference(VP9_PTR comp, int ref_frame_flags);
-
-  int vp9_copy_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
-                             YV12_BUFFER_CONFIG *sd);
-
-  int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb);
-
-  int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
-                            YV12_BUFFER_CONFIG *sd);
-
-  int vp9_update_entropy(VP9_PTR comp, int update);
-
-  int vp9_set_roimap(VP9_PTR comp, unsigned char *map,
-                     unsigned int rows, unsigned int cols,
-                     int delta_q[MAX_SEGMENTS],
-                     int delta_lf[MAX_SEGMENTS],
-                     unsigned int threshold[MAX_SEGMENTS]);
-
-  int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
-                         unsigned int rows, unsigned int cols);
-
-  int vp9_set_internal_size(VP9_PTR comp,
-                            VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
-
-  int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
-                           unsigned int height);
-
-  void vp9_set_svc(VP9_PTR comp, int use_svc);
-
-  int vp9_get_quantizer(VP9_PTR c);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_COMMON_VP9_ONYX_H_
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

new file mode 100644 (file)

index 0000000..e4cd9d4
--- /dev/null
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -0,0 +1,778 @@
+sub vp9_common_forward_decls() {
+print <<EOF
+/*
+ * VP9
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_enums.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vp9_variance_vtable;
+
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp9_common_forward_decls/;
+
+# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
+if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
+  $mmx_x86inc = 'mmx';
+  $sse_x86inc = 'sse';
+  $sse2_x86inc = 'sse2';
+  $ssse3_x86inc = 'ssse3';
+  $avx_x86inc = 'avx';
+  $avx2_x86inc = 'avx2';
+} else {
+  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
+  $avx_x86inc = $avx2_x86inc = '';
+}
+
+# this variable is for functions that are 64 bit only.
+if ($opts{arch} eq "x86_64") {
+  $mmx_x86_64 = 'mmx';
+  $sse2_x86_64 = 'sse2';
+  $ssse3_x86_64 = 'ssse3';
+  $avx_x86_64 = 'avx';
+  $avx2_x86_64 = 'avx2';
+} else {
+  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
+  $avx_x86_64 = $avx2_x86_64 = '';
+}
+
+#
+# RECON
+#
+add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_4x4/;
+
+add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_4x4/;
+
+add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_4x4/;
+
+add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_4x4/;
+
+add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_4x4/;
+
+add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_8x8/;
+
+add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_8x8/;
+
+add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_8x8/;
+
+add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_8x8/;
+
+add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_8x8/;
+
+add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_16x16/;
+
+add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_16x16/;
+
+add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_16x16/;
+
+add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_16x16/;
+
+add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_16x16/;
+
+add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_32x32/;
+
+add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_32x32/;
+
+add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_32x32/;
+
+add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+
+add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_32x32/;
+
+add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_32x32/;
+
+add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_32x32/;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+
+#
+# post proc
+#
+if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_down mmx sse2/;
+$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
+
+add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_across_ip sse2/;
+$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
+
+add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
+specialize qw/vp9_post_proc_down_and_across mmx sse2/;
+$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
+
+add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+specialize qw/vp9_plane_add_noise mmx sse2/;
+$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
+}
+
+add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_inner/;
+
+add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_outer/;
+
+add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_b/;
+
+#
+# Sub Pixel Filters
+#
+add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+
+#
+# dct
+#
+add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
+$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
+
+add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
+specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
+
+# dct and add
+
+add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_1_add/;
+
+add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_16_add/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+
+
+# variance
+add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_8x8 sse2/;
+$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
+
+add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x8/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
+add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x16/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x8/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_hv/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_hv/;
+
+add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x3/;
+
+add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x3/;
+
+add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x3 sse3/;
+
+add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x3 sse3/;
+
+add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x3 sse3/;
+
+add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad64x64x8/;
+
+add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad32x32x8/;
+
+add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x16x8 sse4/;
+
+add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x16x8 sse4/;
+
+add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x4x8/;
+
+add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x8x8/;
+
+add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x4x8 sse4/;
+
+add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x4d sse2/;
+
+add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x64x4d sse2/;
+
+add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x4d sse2/;
+
+add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x4d sse2/;
+
+add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x4d sse2/;
+
+add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x4d sse2/;
+
+# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
+add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x4x4d sse2/;
+
+add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x8x4d sse/;
+
+add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x4d sse/;
+
+#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
+#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
+
+add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x16/;
+
+add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x8/;
+
+add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x8/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse64x64/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse32x32/;
+
+add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
+specialize qw/vp9_get_mb_ss mmx sse2/;
+# ENCODEMB INVOKE
+
+add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+specialize qw/vp9_block_error/, "$sse2_x86inc";
+
+add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
+specialize qw/vp9_subtract_block/, "$sse2_x86inc";
+
+add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
+
+add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
+
+    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
+}
+
+# fdct functions
+add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fwht4x4/;
+
+add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+
+#
+# Motion search
+#
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+specialize qw/vp9_full_search_sad sse3 sse4_1/;
+$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
+$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
+
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_refining_search_sad sse3/;
+$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
+
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_diamond_search_sad sse3/;
+$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
+
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_full_range_search/;
+
+add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+specialize qw/vp9_temporal_filter_apply sse2/;
+
+}
+# end encoder functions
+1;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh

deleted file mode 100644 (file)

index 5b44970..0000000
--- a/vp9/common/vp9_rtcd_defs.sh
+++ /dev/null
@@ -1,760 +0,0 @@
-vp9_common_forward_decls() {
-cat <<EOF
-/*
- * VP9
- */
-
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_enums.h"
-
-struct macroblockd;
-
-/* Encoder forward decls */
-struct macroblock;
-struct vp9_variance_vtable;
-
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
-struct mv;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp9_common_forward_decls
-
-# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
-[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse &&
-  sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2
-
-# this variable is for functions that are 64 bit only.
-[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && 
-  ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2
-
-#
-# RECON
-#
-prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_4x4 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_4x4
-
-prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_4x4
-
-prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_4x4 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_4x4
-
-prototype void vp9_dc_left_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_4x4
-
-prototype void vp9_dc_128_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_4x4
-
-prototype void vp9_d207_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_8x8 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_8x8
-
-prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_8x8
-
-prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_8x8 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_8x8
-
-prototype void vp9_dc_left_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_8x8
-
-prototype void vp9_dc_128_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_8x8
-
-prototype void vp9_d207_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_16x16 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_16x16
-
-prototype void vp9_d135_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_16x16
-
-prototype void vp9_d153_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_16x16
-
-prototype void vp9_dc_left_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_16x16
-
-prototype void vp9_dc_128_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_16x16
-
-prototype void vp9_d207_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_32x32 $ssse3_x86inc neon
-
-prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_32x32
-
-prototype void vp9_d135_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_32x32
-
-prototype void vp9_d153_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_32x32
-
-prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_32x32 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
-
-prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_32x32 $sse2_x86inc
-
-prototype void vp9_dc_top_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_32x32
-
-prototype void vp9_dc_left_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_32x32
-
-prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_32x32
-
-#
-# Loopfilter
-#
-prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_8 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_4 mmx neon dspr2
-
-prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_8 sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_4 mmx neon dspr2
-
-prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
-
-#
-# post proc
-#
-if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then
-prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_down mmx sse2
-vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
-
-prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_across_ip sse2
-vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm
-
-prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"
-specialize vp9_post_proc_down_and_across mmx sse2
-vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm
-
-prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
-specialize vp9_plane_add_noise mmx sse2
-vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt
-fi
-
-prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_inner
-
-prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_outer
-
-prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_b
-
-#
-# Sub Pixel Filters
-#
-prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_copy $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_avg $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2
-
-#
-# dct
-#
-prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_1_add sse2 neon dspr2
-
-prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_16_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_1_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_64_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_10_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_1_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_256_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_10_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1024_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_34_add sse2 neon dspr2
-vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon
-
-prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1_add sse2 neon dspr2
-
-prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht4x4_16_add sse2 neon dspr2
-
-prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht8x8_64_add sse2 neon dspr2
-
-prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type"
-specialize vp9_iht16x16_256_add sse2 dspr2
-
-# dct and add
-
-prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_1_add
-
-prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_16_add
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
-
-
-# variance
-prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x16 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x32 $sse2_x86inc
-
-prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x64 $sse2_x86inc
-
-prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x64 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x8 mmx $sse2_x86inc
-
-prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
-specialize vp9_get_sse_sum_8x8 sse2
-vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
-
-prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x4 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x8 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x4 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
-prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc
-#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x4 $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x8 $sse_x86inc
-
-prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx $sse_x86inc
-
-prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x4_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x8_avg $sse_x86inc
-
-prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x4_avg $sse_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_h
-
-prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_v
-
-prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_hv
-
-prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_h
-
-prototype unsigned int vp9_variance_halfpixvar32x32_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_v
-
-prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_hv
-
-prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x3
-
-prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x3
-
-prototype void vp9_sad16x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x3 sse3 ssse3
-
-prototype void vp9_sad16x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x3 sse3 ssse3
-
-prototype void vp9_sad8x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x3 sse3
-
-prototype void vp9_sad8x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x3 sse3
-
-prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x3 sse3
-
-prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad64x64x8
-
-prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad32x32x8
-
-prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x16x8 sse4
-
-prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x8x8 sse4
-
-prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x16x8 sse4
-
-prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x8x8 sse4
-
-prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x4x8
-
-prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x8x8
-
-prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x4x8 sse4
-
-prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x4d sse2
-
-prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x64x4d sse2
-
-prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x32x4d sse2
-
-prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x16x4d sse2
-
-prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x32x4d sse2
-
-prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x4d sse2
-
-prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x4d sse2
-
-prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x4d sse2
-
-prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x4d sse2
-
-prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x4d sse2
-
-# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
-prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x4x4d sse2
-
-prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x8x4d sse
-
-prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x4d sse
-
-#prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
-#specialize vp9_sub_pixel_mse16x16 sse2 mmx
-
-prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse8x16
-
-prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse16x8
-
-prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse8x8
-
-prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse64x64
-
-prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse32x32
-
-prototype unsigned int vp9_get_mb_ss "const int16_t *"
-specialize vp9_get_mb_ss mmx sse2
-# ENCODEMB INVOKE
-
-prototype int64_t vp9_block_error "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"
-specialize vp9_block_error $sse2_x86inc
-
-prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
-specialize vp9_subtract_block $sse2_x86inc
-
-prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b $ssse3_x86_64
-
-prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 $ssse3_x86_64
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
-    prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp9_ssim_parms_8x8 $sse2_x86_64
-
-    prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp9_ssim_parms_16x16 $sse2_x86_64
-fi
-
-# fdct functions
-prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht4x4 sse2 avx2
-
-prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht8x8 sse2 avx2
-
-prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht16x16 sse2 avx2
-
-prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fwht4x4
-
-prototype void vp9_fdct4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct4x4 sse2 avx2
-
-prototype void vp9_fdct8x8 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct8x8 sse2 avx2
-
-prototype void vp9_fdct16x16 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct16x16 sse2 avx2
-
-prototype void vp9_fdct32x32 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32 sse2 avx2
-
-prototype void vp9_fdct32x32_rd "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32_rd sse2 avx2
-
-#
-# Motion search
-#
-prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"
-specialize vp9_full_search_sad sse3 sse4_1
-vp9_full_search_sad_sse3=vp9_full_search_sadx3
-vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
-
-prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_refining_search_sad sse3
-vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
-
-prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_diamond_search_sad sse3
-vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
-
-prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_full_range_search
-
-prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
-specialize vp9_temporal_filter_apply sse2
-
-fi
-# end encoder functions
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h

index 72edbca..e971158 100644 (file)
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -12,11 +12,11 @@
  #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
  
  #ifdef _MSC_VER
+# include <math.h>  // the ceil() definition must precede intrin.h
  # if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
  #  include <intrin.h>
  #  define USE_MSC_INTRIN
  # endif
-# include <math.h>
  # define snprintf _snprintf
  #endif
  
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c

index efa960c..7e9cc84 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -32,6 +32,26 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
    6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
  };
  
+#if defined(__clang__)
+# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3)
+#  define MM256_BROADCASTSI128_SI256(x) \
+       _mm_broadcastsi128_si256((__m128i const *)&(x))
+# else  // clang > 3.3
+#  define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+# endif  // clang <= 3.3
+#elif defined(__GNUC__)
+# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
+#  define MM256_BROADCASTSI128_SI256(x) \
+       _mm_broadcastsi128_si256((__m128i const *)&(x))
+# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
+#  define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x)
+# else  // gcc > 4.7
+#  define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+# endif  // gcc <= 4.6
+#else  // !(gcc || clang)
+# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+#endif  // __clang__
+
  void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
                                    unsigned int src_pixels_per_line,
                                    unsigned char *output_ptr,
@@ -53,18 +73,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
    // in both lanes of 128 bit register.
    filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
    // have the same data in both lanes of a 256 bit register
-#if defined (__GNUC__)
-#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
-(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
-  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
-#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
-  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
+  filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
  
    // duplicate only the first 16 bits (first and second byte)
    // across 256 bit register
@@ -309,18 +318,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
    // same data in both lanes of 128 bit register.
    filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
    // have the same data in both lanes of a 256 bit register
-#if defined (__GNUC__)
-#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
-(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
-  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
-#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
-  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
+  filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
  
    // duplicate only the first 16 bits (first and second byte)
    // across 256 bit register
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c

index 32d7bfa..1cb741f 100644 (file)
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -35,7 +35,7 @@
  #include "vp9/decoder/vp9_decodemv.h"
  #include "vp9/decoder/vp9_dsubexp.h"
  #include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_onyxd.h"
  #include "vp9/decoder/vp9_read_bit_buffer.h"
  #include "vp9/decoder/vp9_reader.h"
  #include "vp9/decoder/vp9_thread.h"
@@ -146,7 +146,7 @@ static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
  static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
    int i;
    for (i = 0; i < n; ++i)
-    if (vp9_read(r, NMV_UPDATE_PROB))
+    if (vp9_read(r, MV_UPDATE_PROB))
        p[i] = (vp9_read_literal(r, 7) << 1) | 1;
  }
  
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c

index 0fb7a15..9bc21d2 100644 (file)
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -21,7 +21,6 @@
  
  #include "vp9/decoder/vp9_decodemv.h"
  #include "vp9/decoder/vp9_decodeframe.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
  #include "vp9/decoder/vp9_reader.h"
  
  static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h

index 539c984..7394b62 100644 (file)
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -11,7 +11,6 @@
  #ifndef VP9_DECODER_VP9_DECODEMV_H_
  #define VP9_DECODER_VP9_DECODEMV_H_
  
-#include "vp9/decoder/vp9_onyxd_int.h"
  #include "vp9/decoder/vp9_reader.h"
  
  #ifdef __cplusplus
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h

index ce3d765..c5198a2 100644 (file)
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -12,7 +12,7 @@
  #ifndef VP9_DECODER_VP9_DETOKENIZE_H_
  #define VP9_DECODER_VP9_DETOKENIZE_H_
  
-#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_onyxd.h"
  #include "vp9/decoder/vp9_reader.h"
  
  #ifdef __cplusplus
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c

index f9dbab8..7d2179d 100644 (file)
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -9,10 +9,13 @@
   */
  
  #include "./vpx_config.h"
+
+#include "vpx_mem/vpx_mem.h"
+
  #include "vp9/common/vp9_reconinter.h"
+
  #include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_onyxd.h"
  
  #if CONFIG_MULTITHREAD
  static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h

index 203e9fa..5ea251b 100644 (file)
--- a/vp9/decoder/vp9_onyxd.h
+++ b/vp9/decoder/vp9_onyxd.h
@@ -11,16 +11,22 @@
  #ifndef VP9_DECODER_VP9_ONYXD_H_
  #define VP9_DECODER_VP9_ONYXD_H_
  
+#include "./vpx_config.h"
+
+#include "vpx/vpx_codec.h"
  #include "vpx_scale/yv12config.h"
+
+#include "vp9/common/vp9_onyxc_int.h"
  #include "vp9/common/vp9_ppflags.h"
-#include "vpx/vpx_codec.h"
+
+#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_onyxd.h"
+#include "vp9/decoder/vp9_thread.h"
  
  #ifdef __cplusplus
  extern "C" {
  #endif
  
-struct VP9Decompressor;
-
  typedef struct {
    int width;
    int height;
@@ -37,6 +43,44 @@ typedef enum {
    VP9_ALT_FLAG = 4
  } VP9_REFFRAME;
  
+typedef struct VP9Decompressor {
+  DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+
+  DECLARE_ALIGNED(16, VP9_COMMON, common);
+
+  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
+
+  VP9D_CONFIG oxcf;
+
+  const uint8_t *source;
+  size_t source_sz;
+
+  int64_t last_time_stamp;
+  int ready_for_new_data;
+
+  int refresh_frame_flags;
+
+  int decoded_key_frame;
+
+  int initial_width;
+  int initial_height;
+
+  int do_loopfilter_inline;  // apply loopfilter to available rows immediately
+  VP9Worker lf_worker;
+
+  VP9Worker *tile_workers;
+  int num_tile_workers;
+
+  VP9LfSync lf_row_sync;
+
+  /* Each tile column has its own MODE_INFO stream. This array indexes them by
+     tile column index. */
+  MODE_INFO **mi_streams;
+
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  PARTITION_CONTEXT *above_seg_context;
+} VP9D_COMP;
+
  void vp9_initialize_dec();
  
  int vp9_receive_compressed_data(struct VP9Decompressor *pbi,
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c

index 24248a4..7052888 100644 (file)
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -17,7 +17,6 @@
  #include "vp9/common/vp9_postproc.h"
  #endif
  #include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
  #include "vpx_mem/vpx_mem.h"
  #include "vp9/common/vp9_alloccommon.h"
  #include "vp9/common/vp9_loopfilter.h"
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h

deleted file mode 100644 (file)

index 6c6c239..0000000
--- a/vp9/decoder/vp9_onyxd_int.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_DECODER_VP9_ONYXD_INT_H_
-#define VP9_DECODER_VP9_ONYXD_INT_H_
-
-#include "./vpx_config.h"
-
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_thread.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct VP9Decompressor {
-  DECLARE_ALIGNED(16, MACROBLOCKD, mb);
-
-  DECLARE_ALIGNED(16, VP9_COMMON, common);
-
-  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
-
-  VP9D_CONFIG oxcf;
-
-  const uint8_t *source;
-  size_t source_sz;
-
-  int64_t last_time_stamp;
-  int ready_for_new_data;
-
-  int refresh_frame_flags;
-
-  int decoded_key_frame;
-
-  int initial_width;
-  int initial_height;
-
-  int do_loopfilter_inline;  // apply loopfilter to available rows immediately
-  VP9Worker lf_worker;
-
-  VP9Worker *tile_workers;
-  int num_tile_workers;
-
-  VP9LfSync lf_row_sync;
-
-  /* Each tile column has its own MODE_INFO stream. This array indexes them by
-     tile column index. */
-  MODE_INFO **mi_streams;
-
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-  PARTITION_CONTEXT *above_seg_context;
-} VP9D_COMP;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c

index 0f1692d..585b690 100644 (file)
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -26,6 +26,7 @@
  #include "vp9/common/vp9_systemdependent.h"
  #include "vp9/common/vp9_tile_common.h"
  
+#include "vp9/encoder/vp9_cost.h"
  #include "vp9/encoder/vp9_bitstream.h"
  #include "vp9/encoder/vp9_encodemv.h"
  #include "vp9/encoder/vp9_mcomp.h"
@@ -34,10 +35,6 @@
  #include "vp9/encoder/vp9_tokenize.h"
  #include "vp9/encoder/vp9_write_bit_buffer.h"
  
-#ifdef ENTROPY_STATS
-extern unsigned int active_section;
-#endif
-
  static struct vp9_token intra_mode_encodings[INTRA_MODES];
  static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS];
  static struct vp9_token partition_encodings[PARTITION_TYPES];
@@ -97,13 +94,13 @@ static void write_selected_tx_size(const VP9_COMP *cpi,
    }
  }
  
-static int write_skip(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
+static int write_skip(const VP9_COMP *cpi, int segment_id, const MODE_INFO *mi,
                        vp9_writer *w) {
    const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
    if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
      return 1;
    } else {
-    const int skip = m->mbmi.skip;
+    const int skip = mi->mbmi.skip;
      vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
      return skip;
    }
@@ -228,126 +225,107 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
    }
  }
  
-static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
+static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
+                                vp9_writer *w) {
    VP9_COMMON *const cm = &cpi->common;
    const nmv_context *nmvc = &cm->fc.nmvc;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
+  const MACROBLOCK *const x = &cpi->mb;
+  const MACROBLOCKD *const xd = &x->e_mbd;
    const struct segmentation *const seg = &cm->seg;
-  const MB_MODE_INFO *const mi = &m->mbmi;
-  const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
-  const MV_REFERENCE_FRAME ref1 = mi->ref_frame[1];
-  const MB_PREDICTION_MODE mode = mi->mode;
-  const int segment_id = mi->segment_id;
-  const BLOCK_SIZE bsize = mi->sb_type;
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const MB_PREDICTION_MODE mode = mbmi->mode;
+  const int segment_id = mbmi->segment_id;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
    const int allow_hp = cm->allow_high_precision_mv;
-  int skip;
-
-#ifdef ENTROPY_STATS
-  active_section = 9;
-#endif
+  const int is_inter = is_inter_block(mbmi);
+  const int is_compound = has_second_ref(mbmi);
+  int skip, ref;
  
    if (seg->update_map) {
      if (seg->temporal_update) {
-      const int pred_flag = mi->seg_id_predicted;
+      const int pred_flag = mbmi->seg_id_predicted;
        vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
-      vp9_write(bc, pred_flag, pred_prob);
+      vp9_write(w, pred_flag, pred_prob);
        if (!pred_flag)
-        write_segment_id(bc, seg, segment_id);
+        write_segment_id(w, seg, segment_id);
      } else {
-      write_segment_id(bc, seg, segment_id);
+      write_segment_id(w, seg, segment_id);
      }
    }
  
-  skip = write_skip(cpi, segment_id, m, bc);
+  skip = write_skip(cpi, segment_id, mi, w);
  
    if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
-    vp9_write(bc, ref0 != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
+    vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
  
    if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
-      !(ref0 != INTRA_FRAME &&
+      !(is_inter &&
          (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
-    write_selected_tx_size(cpi, mi->tx_size, bsize, bc);
+    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
    }
  
-  if (ref0 == INTRA_FRAME) {
-#ifdef ENTROPY_STATS
-    active_section = 6;
-#endif
-
+  if (!is_inter) {
      if (bsize >= BLOCK_8X8) {
-      write_intra_mode(bc, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
+      write_intra_mode(w, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
      } else {
        int idx, idy;
-      const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-      const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-      for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
-        for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
-          const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode;
-          write_intra_mode(bc, bm, cm->fc.y_mode_prob[0]);
+      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+      const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+      for (idy = 0; idy < 2; idy += num_4x4_h) {
+        for (idx = 0; idx < 2; idx += num_4x4_w) {
+          const MB_PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
+          write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]);
          }
        }
      }
-    write_intra_mode(bc, mi->uv_mode, cm->fc.uv_mode_prob[mode]);
+    write_intra_mode(w, mbmi->uv_mode, cm->fc.uv_mode_prob[mode]);
    } else {
-    vp9_prob *mv_ref_p;
-    write_ref_frames(cpi, bc);
-    mv_ref_p = cm->fc.inter_mode_probs[mi->mode_context[ref0]];
-
-#ifdef ENTROPY_STATS
-    active_section = 3;
-#endif
+    const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
+    const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
+    write_ref_frames(cpi, w);
  
      // If segment skip is not enabled code the mode.
      if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
        if (bsize >= BLOCK_8X8) {
-        write_inter_mode(bc, mode, mv_ref_p);
-        ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(mode)];
+        write_inter_mode(w, mode, inter_probs);
+        ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(mode)];
        }
      }
  
      if (cm->interp_filter == SWITCHABLE) {
        const int ctx = vp9_get_pred_context_switchable_interp(xd);
-      vp9_write_token(bc, vp9_switchable_interp_tree,
+      vp9_write_token(w, vp9_switchable_interp_tree,
                        cm->fc.switchable_interp_prob[ctx],
-                      &switchable_interp_encodings[mi->interp_filter]);
+                      &switchable_interp_encodings[mbmi->interp_filter]);
      } else {
-      assert(mi->interp_filter == cm->interp_filter);
+      assert(mbmi->interp_filter == cm->interp_filter);
      }
  
      if (bsize < BLOCK_8X8) {
-      const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-      const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+      const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
        int idx, idy;
-      for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
-        for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+      for (idy = 0; idy < 2; idy += num_4x4_h) {
+        for (idx = 0; idx < 2; idx += num_4x4_w) {
            const int j = idy * 2 + idx;
-          const MB_PREDICTION_MODE b_mode = m->bmi[j].as_mode;
-          write_inter_mode(bc, b_mode, mv_ref_p);
-          ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(b_mode)];
+          const MB_PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+          write_inter_mode(w, b_mode, inter_probs);
+          ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
            if (b_mode == NEWMV) {
-#ifdef ENTROPY_STATS
-            active_section = 11;
-#endif
-            vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
-                          &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
-
-            if (has_second_ref(mi))
-              vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
-                            &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
+            for (ref = 0; ref < 1 + is_compound; ++ref)
+              vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
+                            &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+                            nmvc, allow_hp);
            }
          }
        }
-    } else if (mode == NEWMV) {
-#ifdef ENTROPY_STATS
-      active_section = 5;
-#endif
-      vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
-                    &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
-
-      if (has_second_ref(mi))
-        vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
-                      &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
+    } else {
+      if (mode == NEWMV) {
+        for (ref = 0; ref < 1 + is_compound; ++ref)
+          vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
+                        &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
+                        allow_hp);
+      }
      }
    }
  }
@@ -409,14 +387,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
                   cm->mi_rows, cm->mi_cols);
    if (frame_is_intra_only(cm)) {
      write_mb_modes_kf(cpi, xd->mi_8x8, w);
-#ifdef ENTROPY_STATS
-    active_section = 8;
-#endif
    } else {
      pack_inter_mode_mvs(cpi, m, w);
-#ifdef ENTROPY_STATS
-    active_section = 1;
-#endif
    }
  
    assert(*tok < tok_end);
@@ -1150,18 +1122,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
      encode_txfm_probs(cm, &header_bc);
  
    update_coef_probs(cpi, &header_bc);
-
-#ifdef ENTROPY_STATS
-  active_section = 2;
-#endif
-
    update_skip_probs(cm, &header_bc);
  
    if (!frame_is_intra_only(cm)) {
      int i;
-#ifdef ENTROPY_STATS
-    active_section = 1;
-#endif
  
      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
        prob_diff_update(vp9_inter_mode_tree, cm->fc.inter_mode_probs[i],
@@ -1235,13 +1199,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
  
    vp9_compute_update_table();
  
-#ifdef ENTROPY_STATS
-  if (cm->frame_type == INTER_FRAME)
-    active_section = 0;
-  else
-    active_section = 7;
-#endif
-
    vp9_clear_system_state();
  
    first_part_size = write_compressed_header(cpi, data);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h

index 85f6c97..5d69856 100644 (file)
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -11,7 +11,6 @@
  #ifndef VP9_ENCODER_VP9_BLOCK_H_
  #define VP9_ENCODER_VP9_BLOCK_H_
  
-#include "vp9/common/vp9_onyx.h"
  #include "vp9/common/vp9_entropymv.h"
  #include "vp9/common/vp9_entropy.h"
  #include "vpx_ports/mem.h"
@@ -132,9 +131,9 @@ struct macroblock {
    int *nmvsadcost_hp[2];
    int **mvsadcost;
  
-  int mbmode_cost[MB_MODE_COUNT];
+  int mbmode_cost[INTRA_MODES];
    unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-  int intra_uv_mode_cost[2][MB_MODE_COUNT];
+  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
    int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
    int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
  
diff --git a/vp9/encoder/vp9_cost.c b/vp9/encoder/vp9_cost.c

new file mode 100644 (file)

index 0000000..1c3c3d2
--- /dev/null
+++ b/vp9/encoder/vp9_cost.c
@@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/encoder/vp9_cost.h"
+
+const unsigned int vp9_prob_cost[256] = {
+  2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
+  1129, 1099, 1072, 1046, 1023, 1000, 979,  959,  940,  922,  905,  889,
+  873,  858,  843,  829,  816,  803,  790,  778,  767,  755,  744,  733,
+  723,  713,  703,  693,  684,  675,  666,  657,  649,  641,  633,  625,
+  617,  609,  602,  594,  587,  580,  573,  567,  560,  553,  547,  541,
+  534,  528,  522,  516,  511,  505,  499,  494,  488,  483,  477,  472,
+  467,  462,  457,  452,  447,  442,  437,  433,  428,  424,  419,  415,
+  410,  406,  401,  397,  393,  389,  385,  381,  377,  373,  369,  365,
+  361,  357,  353,  349,  346,  342,  338,  335,  331,  328,  324,  321,
+  317,  314,  311,  307,  304,  301,  297,  294,  291,  288,  285,  281,
+  278,  275,  272,  269,  266,  263,  260,  257,  255,  252,  249,  246,
+  243,  240,  238,  235,  232,  229,  227,  224,  221,  219,  216,  214,
+  211,  208,  206,  203,  201,  198,  196,  194,  191,  189,  186,  184,
+  181,  179,  177,  174,  172,  170,  168,  165,  163,  161,  159,  156,
+  154,  152,  150,  148,  145,  143,  141,  139,  137,  135,  133,  131,
+  129,  127,  125,  123,  121,  119,  117,  115,  113,  111,  109,  107,
+  105,  103,  101,  99,   97,   95,   93,   92,   90,   88,   86,   84,
+  82,   81,   79,   77,   75,   73,   72,   70,   68,   66,   65,   63,
+  61,   60,   58,   56,   55,   53,   51,   50,   48,   46,   45,   43,
+  41,   40,   38,   37,   35,   33,   32,   30,   29,   27,   25,   24,
+  22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,
+  4,    3,    1,    1};
+
+static void cost(int *costs, vp9_tree tree, const vp9_prob *probs,
+                 int i, int c) {
+  const vp9_prob prob = probs[i / 2];
+  int b;
+
+  for (b = 0; b <= 1; ++b) {
+    const int cc = c + vp9_cost_bit(prob, b);
+    const vp9_tree_index ii = tree[i + b];
+
+    if (ii <= 0)
+      costs[-ii] = cc;
+    else
+      cost(costs, tree, probs, ii, cc);
+  }
+}
+
+void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) {
+  cost(costs, tree, probs, 0, 0);
+}
+
+void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) {
+  assert(tree[0] <= 0 && tree[1] > 0);
+
+  costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
+  cost(costs, tree, probs, 2, 0);
+}
diff --git a/vp9/encoder/vp9_cost.h b/vp9/encoder/vp9_cost.h

new file mode 100644 (file)

index 0000000..6d2b940
--- /dev/null
+++ b/vp9/encoder/vp9_cost.h
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_COST_H_
+#define VP9_ENCODER_VP9_COST_H_
+
+#include "vp9/common/vp9_prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const unsigned int vp9_prob_cost[256];
+
+#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
+
+#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
+
+#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
+                                                    : (prob))
+
+static INLINE unsigned int cost_branch256(const unsigned int ct[2],
+                                          vp9_prob p) {
+  return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
+}
+
+static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs,
+                             int bits, int len) {
+  int cost = 0;
+  vp9_tree_index i = 0;
+
+  do {
+    const int bit = (bits >> --len) & 1;
+    cost += vp9_cost_bit(probs[i >> 1], bit);
+    i = tree[i + bit];
+  } while (len);
+
+  return cost;
+}
+
+void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
+void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_COST_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c

index 1bcde37..57079df 100644 (file)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2010,7 +2010,6 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
        const int idx_str = cm->mode_info_stride * mi_row + mi_col;
        MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
        MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-
        cpi->mb.source_variance = UINT_MAX;
        if (cpi->sf.partition_search_type == FIXED_PARTITION) {
          set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2297,19 +2296,13 @@ typedef enum {
  
  static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
                            MB_PREDICTION_MODE mode) {
-  mbmi->interp_filter = EIGHTTAP;
    mbmi->mode = mode;
+  mbmi->uv_mode = mode;
    mbmi->mv[0].as_int = 0;
    mbmi->mv[1].as_int = 0;
-  if (mode < NEARESTMV) {
-    mbmi->ref_frame[0] = INTRA_FRAME;
-  } else {
-    mbmi->ref_frame[0] = LAST_FRAME;
-  }
-
-  mbmi->ref_frame[1] = INTRA_FRAME;
+  mbmi->ref_frame[0] = INTRA_FRAME;
+  mbmi->ref_frame[1] = NONE;
    mbmi->tx_size = max_txsize_lookup[bsize];
-  mbmi->uv_mode = mode;
    mbmi->skip = 0;
    mbmi->sb_type = bsize;
    mbmi->segment_id = 0;
@@ -2433,6 +2426,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
    vp9_zero(cpi->coef_counts);
    vp9_zero(cm->counts.eob_branch);
  
+  // Set frame level transform size use case
+  select_tx_mode(cpi);
+
    cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
        && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
    switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
@@ -2441,7 +2437,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
  
    vp9_initialize_rd_consts(cpi);
    vp9_initialize_me_consts(cpi, cm->base_qindex);
-  switch_tx_mode(cpi);
  
    if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
      // Initialize encode frame context.
@@ -2497,7 +2492,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
            vp9_tile_init(&tile, cm, tile_row, tile_col);
            for (mi_row = tile.mi_row_start;
                 mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
-            if (cpi->sf.use_nonrd_pick_mode)
+            if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
                encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
              else
                encode_rd_sb_row(cpi, &tile, mi_row, &tp);
@@ -2601,8 +2596,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
  
      cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
  
-    /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
-    select_tx_mode(cpi);
      cm->reference_mode = reference_mode;
  
      encode_frame_internal(cpi);
@@ -2683,6 +2676,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
        }
      }
    } else {
+    cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
+    cm->reference_mode = SINGLE_REFERENCE;
      // Force the usage of the BILINEAR interp_filter.
      cm->interp_filter = BILINEAR;
      encode_frame_internal(cpi);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c

index 5079699..703dde3 100644 (file)
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -13,11 +13,9 @@
  #include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_entropymode.h"
  #include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_encodemv.h"
  
-#ifdef ENTROPY_STATS
-extern unsigned int active_section;
-#endif
+#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_encodemv.h"
  
  static struct vp9_token mv_joint_encodings[MV_JOINTS];
  static struct vp9_token mv_class_encodings[MV_CLASSES];
@@ -160,7 +158,7 @@ static void write_mv_update(const vp9_tree_index *tree,
  
    vp9_tree_probs_from_distribution(tree, branch_ct, counts);
    for (i = 0; i < n - 1; ++i)
-    update_mv(w, branch_ct[i], &probs[i], NMV_UPDATE_PROB);
+    update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB);
  }
  
  void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
@@ -174,13 +172,13 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
      nmv_component *comp = &mvc->comps[i];
      nmv_component_counts *comp_counts = &counts->comps[i];
  
-    update_mv(w, comp_counts->sign, &comp->sign, NMV_UPDATE_PROB);
+    update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
      write_mv_update(vp9_mv_class_tree, comp->classes, comp_counts->classes,
                      MV_CLASSES, w);
      write_mv_update(vp9_mv_class0_tree, comp->class0, comp_counts->class0,
                      CLASS0_SIZE, w);
      for (j = 0; j < MV_OFFSET_BITS; ++j)
-      update_mv(w, comp_counts->bits[j], &comp->bits[j], NMV_UPDATE_PROB);
+      update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
    }
  
    for (i = 0; i < 2; ++i) {
@@ -195,8 +193,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
    if (usehp) {
      for (i = 0; i < 2; ++i) {
        update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
-                NMV_UPDATE_PROB);
-      update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, NMV_UPDATE_PROB);
+                MV_UPDATE_PROB);
+      update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
      }
    }
  }
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c

index 56a5f5f..f548de5 100644 (file)
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -95,7 +95,7 @@ static int kfboost_qadjust(int qindex) {
  // Resets the first pass file to the given position using a relative seek from
  // the current position.
  static void reset_fpf_position(struct twopass_rc *p,
-                               FIRSTPASS_STATS *position) {
+                               const FIRSTPASS_STATS *position) {
    p->stats_in = position;
  }
  
@@ -415,6 +415,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                      x->sadperbit16, &num00, &v_fn_ptr,
                                      x->nmvjointcost,
                                      x->mvcost, ref_mv);
+  if (tmp_err < INT_MAX)
+    tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
    if (tmp_err < INT_MAX - new_mv_mode_penalty)
      tmp_err += new_mv_mode_penalty;
  
@@ -439,6 +441,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                          &num00, &v_fn_ptr,
                                          x->nmvjointcost,
                                          x->mvcost, ref_mv);
+      if (tmp_err < INT_MAX)
+        tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
        if (tmp_err < INT_MAX - new_mv_mode_penalty)
          tmp_err += new_mv_mode_penalty;
  
@@ -672,7 +676,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
            mv.as_mv.row *= 8;
            mv.as_mv.col *= 8;
            this_error = motion_error;
-          vp9_set_mbmode_and_mvs(xd, NEWMV, &mv.as_mv);
+          xd->mi_8x8[0]->mbmi.mode = NEWMV;
+          xd->mi_8x8[0]->mbmi.mv[0] = mv;
            xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
            xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
            xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
@@ -898,7 +903,7 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
  
  void vp9_init_second_pass(VP9_COMP *cpi) {
    FIRSTPASS_STATS this_frame;
-  FIRSTPASS_STATS *start_pos;
+  const FIRSTPASS_STATS *start_pos;
    struct twopass_rc *const twopass = &cpi->twopass;
    const VP9_CONFIG *const oxcf = &cpi->oxcf;
  
@@ -1006,7 +1011,7 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
        loop_decay_rate >= 0.999 &&
        last_decay_rate < 0.9) {
      int j;
-    FIRSTPASS_STATS *position = cpi->twopass.stats_in;
+    const FIRSTPASS_STATS *position = cpi->twopass.stats_in;
      FIRSTPASS_STATS tmp_next_frame;
  
      // Look ahead a few frames to see if static condition persists...
@@ -1341,7 +1346,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
  // Analyse and define a gf/arf group.
  static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
    FIRSTPASS_STATS next_frame = { 0 };
-  FIRSTPASS_STATS *start_pos;
+  const FIRSTPASS_STATS *start_pos;
    struct twopass_rc *const twopass = &cpi->twopass;
    int i;
    double boost_score = 0.0;
@@ -1788,19 +1793,12 @@ static int test_candidate_kf(VP9_COMP *cpi,
           ((next_frame->intra_error /
             DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) {
      int i;
-    FIRSTPASS_STATS *start_pos;
-
-    FIRSTPASS_STATS local_next_frame;
-
+    const FIRSTPASS_STATS *start_pos = cpi->twopass.stats_in;
+    FIRSTPASS_STATS local_next_frame = *next_frame;
      double boost_score = 0.0;
      double old_boost_score = 0.0;
      double decay_accumulator = 1.0;
  
-    local_next_frame = *next_frame;
-
-    // Note the starting file position so we can reset to it.
-    start_pos = cpi->twopass.stats_in;
-
      // Examine how well the key frame predicts subsequent frames.
      for (i = 0; i < 16; ++i) {
        double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error /
@@ -1856,7 +1854,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
    FIRSTPASS_STATS last_frame;
    FIRSTPASS_STATS first_frame;
    FIRSTPASS_STATS next_frame;
-  FIRSTPASS_STATS *start_position;
+  const FIRSTPASS_STATS *start_position;
  
    double decay_accumulator = 1.0;
    double zero_motion_accumulator = 1.0;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h

index 03c0e20..9f44a30 100644 (file)
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -43,7 +43,9 @@ struct twopass_rc {
    unsigned int this_iiratio;
    FIRSTPASS_STATS total_stats;
    FIRSTPASS_STATS this_frame_stats;
-  FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+  const FIRSTPASS_STATS *stats_in;
+  const FIRSTPASS_STATS *stats_in_start;
+  const FIRSTPASS_STATS *stats_in_end;
    FIRSTPASS_STATS total_left_stats;
    int first_pass_done;
    int64_t bits_left;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c

index d3e19b4..6520389 100644 (file)
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -61,7 +61,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
          &sse);
    }
  
-  vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv);
+  xd->mi_8x8[0]->mbmi.mode = NEWMV;
+  xd->mi_8x8[0]->mbmi.mv[0].as_mv = *dst_mv;
+
    vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
  
    /* restore UMV window */
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c

index 7d6fd3b..d40f713 100644 (file)
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -98,42 +98,23 @@ static int mvsad_err_cost(const MV *mv, const MV *ref,
  }
  
  void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
-  int len;
-  int search_site_count = 0;
+  int len, ss_count = 1;
  
-  // Generate offsets for 4 search sites per step.
-  x->ss[search_site_count].mv.col = 0;
-  x->ss[search_site_count].mv.row = 0;
-  x->ss[search_site_count].offset = 0;
-  search_site_count++;
+  x->ss[0].mv.col = x->ss[0].mv.row = 0;
+  x->ss[0].offset = 0;
  
    for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = -len;
-    x->ss[search_site_count].offset = -len * stride;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = len;
-    x->ss[search_site_count].offset = len * stride;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = -len;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = -len;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = len;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = len;
-    search_site_count++;
+    // Generate offsets for 4 search sites per step.
+    const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
+    int i;
+    for (i = 0; i < 4; ++i) {
+      search_site *const ss = &x->ss[ss_count++];
+      ss->mv = ss_mvs[i];
+      ss->offset = ss->mv.row * stride + ss->mv.col;
+    }
    }
  
-  x->ss_count = search_site_count;
+  x->ss_count = ss_count;
    x->searches_per_step = 4;
  }
  
@@ -724,20 +705,52 @@ static int vp9_pattern_search(const MACROBLOCK *x,
    best_mv->row = br;
    best_mv->col = bc;
  
-  this_offset = base_offset + (best_mv->row * in_what_stride) +
-                               best_mv->col;
    this_mv.row = best_mv->row * 8;
    this_mv.col = best_mv->col * 8;
-  if (bestsad == INT_MAX)
-    return INT_MAX;
+  return bestsad;
+}
+
+int vp9_get_mvpred_var(const MACROBLOCK *x,
+                       const MV *best_mv, const MV *center_mv,
+                       const vp9_variance_fn_ptr_t *vfp,
+                       int use_mvcost) {
+  unsigned int unused;
  
-  return vfp->vf(what, what_stride, this_offset, in_what_stride,
-                 (unsigned int *)&bestsad) +
-         use_mvcost ? mv_err_cost(&this_mv, center_mv,
-                                  x->nmvjointcost, x->mvcost, x->errorperbit)
-                    : 0;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const uint8_t *what = x->plane[0].src.buf;
+  const int what_stride = x->plane[0].src.stride;
+  const int in_what_stride = xd->plane[0].pre[0].stride;
+  const uint8_t *base_offset = xd->plane[0].pre[0].buf;
+  const uint8_t *this_offset = &base_offset[best_mv->row * in_what_stride +
+                                            best_mv->col];
+  const MV mv = {best_mv->row * 8, best_mv->col * 8};
+  return vfp->vf(what, what_stride, this_offset, in_what_stride, &unused) +
+      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
+                                 x->mvcost, x->errorperbit) : 0);
  }
  
+int vp9_get_mvpred_av_var(const MACROBLOCK *x,
+                          MV *best_mv,
+                          const MV *center_mv,
+                          const uint8_t *second_pred,
+                          const vp9_variance_fn_ptr_t *vfp,
+                          int use_mvcost) {
+  unsigned int bestsad;
+  MV this_mv;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const uint8_t *what = x->plane[0].src.buf;
+  const int what_stride = x->plane[0].src.stride;
+  const int in_what_stride = xd->plane[0].pre[0].stride;
+  const uint8_t *base_offset = xd->plane[0].pre[0].buf;
+  const uint8_t *this_offset = base_offset + (best_mv->row * in_what_stride) +
+      best_mv->col;
+  this_mv.row = best_mv->row * 8;
+  this_mv.col = best_mv->col * 8;
+  return vfp->svaf(this_offset, in_what_stride, 0, 0, what, what_stride,
+                   &bestsad, second_pred) +
+      (use_mvcost ?  mv_err_cost(&this_mv, center_mv, x->nmvjointcost,
+                                 x->mvcost, x->errorperbit) : 0);
+}
  
  int vp9_hex_search(const MACROBLOCK *x,
                     MV *ref_mv,
@@ -855,182 +868,18 @@ int vp9_square_search(const MACROBLOCK *x,
                              square_num_candidates, square_candidates);
  };
  
-// Number of candidates in first hex search
-#define FIRST_HEX_CANDIDATES 6
-// Index of previous hex search's best match
-#define PRE_BEST_CANDIDATE 6
-// Number of candidates in following hex search
-#define NEXT_HEX_CANDIDATES 3
-// Number of candidates in refining search
-#define REFINE_CANDIDATES 4
-
  int vp9_fast_hex_search(const MACROBLOCK *x,
                          MV *ref_mv,
                          int search_param,
                          int sad_per_bit,
+                        int do_init_search,  // must be zero for fast_hex
                          const vp9_variance_fn_ptr_t *vfp,
                          int use_mvcost,
                          const MV *center_mv,
                          MV *best_mv) {
-  const MACROBLOCKD* const xd = &x->e_mbd;
-  static const MV hex[FIRST_HEX_CANDIDATES] = {
-    { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}
-  };
-  static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = {
-    {{ -2, 0}, { -1, -2}, {1, -2}},
-    {{ -1, -2}, {1, -2}, {2, 0}},
-    {{1, -2}, {2, 0}, {1, 2}},
-    {{2, 0}, {1, 2}, { -1, 2}},
-    {{1, 2}, { -1, 2}, { -2, 0}},
-    {{ -1, 2}, { -2, 0}, { -1, -2}}
-  };
-  static const MV neighbors[REFINE_CANDIDATES] = {
-      {0, -1}, { -1, 0}, {1, 0}, {0, 1}
-  };
-  int i, j;
-
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  int br, bc;
-  MV this_mv;
-  unsigned int bestsad = 0x7fffffff;
-  unsigned int thissad;
-  const uint8_t *base_offset;
-  const uint8_t *this_offset;
-  int k = -1;
-  int best_site = -1;
-  const int max_hex_search = 512;
-  const int max_dia_search = 32;
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
-  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  // Adjust ref_mv to make sure it is within MV range
-  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  br = ref_mv->row;
-  bc = ref_mv->col;
-
-  // Check the start point
-  base_offset = xd->plane[0].pre[0].buf;
-  this_offset = base_offset + (br * in_what_stride) + bc;
-  this_mv.row = br;
-  this_mv.col = bc;
-  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                             sad_per_bit);
-
-  // Initial 6-point hex search
-  if (check_bounds(x, br, bc, 2)) {
-    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
-      this_mv.row = br + hex[i].row;
-      this_mv.col = bc + hex[i].col;
-      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
-      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                         bestsad);
-      CHECK_BETTER
-    }
-  } else {
-    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
-      this_mv.row = br + hex[i].row;
-      this_mv.col = bc + hex[i].col;
-      if (!is_mv_in(x, &this_mv))
-        continue;
-      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
-      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                         bestsad);
-      CHECK_BETTER
-    }
-  }
-
-  // Continue hex search if we find a better match in first round
-  if (best_site != -1) {
-    br += hex[best_site].row;
-    bc += hex[best_site].col;
-    k = best_site;
-
-    // Allow search covering maximum MV range
-    for (j = 1; j < max_hex_search; j++) {
-      best_site = -1;
-
-      if (check_bounds(x, br, bc, 2)) {
-        for (i = 0; i < 3; i++) {
-          this_mv.row = br + next_chkpts[k][i].row;
-          this_mv.col = bc + next_chkpts[k][i].col;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-              this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
-          CHECK_BETTER
-        }
-      } else {
-        for (i = 0; i < 3; i++) {
-          this_mv.row = br + next_chkpts[k][i].row;
-          this_mv.col = bc + next_chkpts[k][i].col;
-          if (!is_mv_in(x, &this_mv))
-            continue;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-              this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
-          CHECK_BETTER
-        }
-      }
-
-      if (best_site == -1) {
-        break;
-      } else {
-        br += next_chkpts[k][best_site].row;
-        bc += next_chkpts[k][best_site].col;
-        k += 5 + best_site;
-        if (k >= 12) k -= 12;
-        else if (k >= 6) k -= 6;
-      }
-    }
-  }
-
-  // Check 4 1-away neighbors
-  for (j = 0; j < max_dia_search; j++) {
-    best_site = -1;
-
-    if (check_bounds(x, br, bc, 1)) {
-      for (i = 0; i < REFINE_CANDIDATES; i++) {
-        this_mv.row = br + neighbors[i].row;
-        this_mv.col = bc + neighbors[i].col;
-        this_offset = base_offset + (this_mv.row * in_what_stride) +
-            this_mv.col;
-        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                           bestsad);
-        CHECK_BETTER
-      }
-    } else {
-      for (i = 0; i < REFINE_CANDIDATES; i++) {
-        this_mv.row = br + neighbors[i].row;
-        this_mv.col = bc + neighbors[i].col;
-        if (!is_mv_in(x, &this_mv))
-          continue;
-        this_offset = base_offset + (this_mv.row * in_what_stride) +
-            this_mv.col;
-        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                           bestsad);
-        CHECK_BETTER
-      }
-    }
-
-    if (best_site == -1) {
-      break;
-    } else {
-      br += neighbors[best_site].row;
-      bc += neighbors[best_site].col;
-    }
-  }
-
-  best_mv->row = br;
-  best_mv->col = bc;
-
-  return bestsad;
+  return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
+                        sad_per_bit, do_init_search, vfp, use_mvcost,
+                        center_mv, best_mv);
  }
  
  #undef CHECK_BETTER
@@ -1045,8 +894,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
    const int what_stride = x->plane[0].src.stride;
    const uint8_t *in_what;
    const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address;
-
    MV this_mv;
  
    unsigned int bestsad = INT_MAX;
@@ -1076,7 +923,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
  
    // Work out the start point for the search
    in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
-  best_address = in_what;
  
    // Check the starting position
    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
@@ -1134,20 +980,9 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
        }
      }
    }
-
    best_mv->row += best_tr;
    best_mv->col += best_tc;
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                       mv_err_cost(&this_mv, center_mv,
-                                   mvjcost, mvcost, x->errorperbit);
+  return bestsad;
  }
  
  int vp9_diamond_search_sad_c(const MACROBLOCK *x,
@@ -1272,17 +1107,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
        (*num00)++;
      }
    }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                       mv_err_cost(&this_mv, center_mv,
-                                   mvjcost, mvcost, x->errorperbit);
+  return bestsad;
  }
  
  int vp9_diamond_search_sadx4(const MACROBLOCK *x,
@@ -1448,24 +1273,14 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
        (*num00)++;
      }
    }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                    mv_err_cost(&this_mv, center_mv,
-                                mvjcost, mvcost, x->errorperbit);
+  return bestsad;
  }
  
  /* do_refine: If last step (1-away) of n-step search doesn't pick the center
                point as the best match, we will do a final 1-away diamond
                refining search  */
  
-int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
+int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
                             MV *mvp_full, int step_param,
                             int sadpb, int further_steps, int do_refine,
                             const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1476,6 +1291,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
                                          step_param, sadpb, &n,
                                          fn_ptr, x->nmvjointcost,
                                          x->mvcost, ref_mv);
+  if (bestsme < INT_MAX)
+    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
    *dst_mv = temp_mv;
  
    // If there won't be more n-step search, check to see if refining search is
@@ -1493,6 +1310,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
                                          step_param + n, sadpb, &num00,
                                          fn_ptr, x->nmvjointcost, x->mvcost,
                                          ref_mv);
+      if (thissme < INT_MAX)
+        thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  
        // check to see if refining search is needed.
        if (num00 > further_steps - n)
@@ -1512,12 +1331,13 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
      thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
                                         fn_ptr, x->nmvjointcost, x->mvcost,
                                         ref_mv);
+    if (thissme < INT_MAX)
+      thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
      if (thissme < bestsme) {
        bestsme = thissme;
        *dst_mv = best_mv;
      }
    }
-
    return bestsme;
  }
  
@@ -1558,19 +1378,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
        if (sad < best_sad) {
          best_sad = sad;
          *best_mv = this_mv;
-        best_address = check_here;
        }
      }
    }
-
-  if (best_sad < INT_MAX) {
-    unsigned int unused;
-    const MV mv = {best_mv->row * 8, best_mv->col * 8};
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused)
-                + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return best_sad;
  }
  
  int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
@@ -1635,10 +1446,8 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
              bestsad = thissad;
              best_mv->row = r;
              best_mv->col = c;
-            bestaddress = check_here;
            }
          }
-
          check_here++;
          c++;
        }
@@ -1657,7 +1466,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
            bestsad = thissad;
            best_mv->row = r;
            best_mv->col = c;
-          bestaddress = check_here;
          }
        }
  
@@ -1665,17 +1473,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
        c++;
      }
    }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return bestsad;
  }
  
  int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
@@ -1742,7 +1540,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
              bestsad = thissad;
              best_mv->row = r;
              best_mv->col = c;
-            bestaddress = check_here;
            }
          }
  
@@ -1768,7 +1565,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
              bestsad = thissad;
              best_mv->row = r;
              best_mv->col = c;
-            bestaddress = check_here;
            }
          }
  
@@ -1790,7 +1586,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
            bestsad = thissad;
            best_mv->row = r;
            best_mv->col = c;
-          bestaddress = check_here;
          }
        }
  
@@ -1798,17 +1593,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
        c++;
      }
    }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return bestsad;
  }
  
  int vp9_refining_search_sad_c(const MACROBLOCK *x,
@@ -1863,19 +1648,9 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
      } else {
        ref_mv->row += neighbors[best_site].row;
        ref_mv->col += neighbors[best_site].col;
-      best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
      }
    }
-
-  if (bestsad < INT_MAX) {
-    unsigned int unused;
-    const MV mv = {ref_mv->row * 8, ref_mv->col * 8};
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                      &unused) +
-        mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return bestsad;
  }
  
  int vp9_refining_search_sadx4(const MACROBLOCK *x,
@@ -1887,7 +1662,6 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
    const MACROBLOCKD *const xd = &x->e_mbd;
    MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
    int i, j;
-  int this_row_offset, this_col_offset;
  
    const int what_stride = x->plane[0].src.stride;
    const int in_what_stride = xd->plane[0].pre[0].stride;
@@ -1895,8 +1669,6 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
    const uint8_t *best_address = xd->plane[0].pre[0].buf +
                            (ref_mv->row * xd->plane[0].pre[0].stride) +
                            ref_mv->col;
-  unsigned int thissad;
-  MV this_mv;
  
    const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  
@@ -1928,8 +1700,8 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
  
        for (j = 0; j < 4; j++) {
          if (sad_array[j] < bestsad) {
-          this_mv.row = ref_mv->row + neighbors[j].row;
-          this_mv.col = ref_mv->col + neighbors[j].col;
+          const MV this_mv = {ref_mv->row + neighbors[j].row,
+                              ref_mv->col + neighbors[j].col};
            sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv,
                                           mvjsadcost, mvsadcost, error_per_bit);
  
@@ -1941,21 +1713,16 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
        }
      } else {
        for (j = 0; j < 4; j++) {
-        this_row_offset = ref_mv->row + neighbors[j].row;
-        this_col_offset = ref_mv->col + neighbors[j].col;
+        const MV this_mv = {ref_mv->row + neighbors[j].row,
+                            ref_mv->col + neighbors[j].col};
  
-        if ((this_col_offset > x->mv_col_min) &&
-            (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) &&
-            (this_row_offset < x->mv_row_max)) {
+        if (is_mv_in(x, &this_mv)) {
            const uint8_t *check_here = neighbors[j].row * in_what_stride +
                                        neighbors[j].col + best_address;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
+          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
+                                             in_what_stride, bestsad);
  
            if (thissad < bestsad) {
-            this_mv.row = this_row_offset;
-            this_mv.col = this_col_offset;
              thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                        mvjsadcost, mvsadcost, error_per_bit);
  
@@ -1978,16 +1745,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
      }
    }
  
-  this_mv.row = ref_mv->row * 8;
-  this_mv.col = ref_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return bestsad;
  }
  
  // This function is called when we do joint motion search in comp_inter_inter
@@ -2052,21 +1810,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
      } else {
        ref_mv->row += neighbors[best_site].row;
        ref_mv->col += neighbors[best_site].col;
-      best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
      }
    }
-
-  this_mv.row = ref_mv->row * 8;
-  this_mv.col = ref_mv->col * 8;
-
-  if (bestsad < INT_MAX) {
-    // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
-    // so we don't have to use the subpixel with xoff=0,yoff=0 here.
-    return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
-                        (unsigned int *)(&thissad), second_pred) +
-                        mv_err_cost(&this_mv, center_mv,
-                                    mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return bestsad;
  }
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h

index 586a74c..0d12710 100644 (file)
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -35,6 +35,18 @@ extern "C" {
  void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
  int vp9_mv_bit_cost(const MV *mv, const MV *ref,
                      const int *mvjcost, int *mvcost[2], int weight);
+
+// Utility to compute variance + MV rate cost for a given MV
+int vp9_get_mvpred_var(const MACROBLOCK *x,
+                       const MV *best_mv, const MV *center_mv,
+                       const vp9_variance_fn_ptr_t *vfp,
+                       int use_mvcost);
+int vp9_get_mvpred_av_var(const MACROBLOCK *x,
+                          MV *best_mv,
+                          const MV *center_mv,
+                          const uint8_t *second_pred,
+                          const vp9_variance_fn_ptr_t *vfp,
+                          int use_mvcost);
  void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
  void vp9_init3smotion_compensation(MACROBLOCK *x,  int stride);
  
@@ -42,47 +54,27 @@ struct VP9_COMP;
  int vp9_init_search_range(struct VP9_COMP *cpi, int size);
  
  // Runs sequence of diamond searches in smaller steps for RD
-int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
+int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
                             MV *mvp_full, int step_param,
                             int sadpb, int further_steps, int do_refine,
                             const vp9_variance_fn_ptr_t *fn_ptr,
                             const MV *ref_mv, MV *dst_mv);
  
-int vp9_hex_search(const MACROBLOCK *x,
-                   MV *ref_mv,
-                   int search_param,
-                   int error_per_bit,
-                   int do_init_search,
-                   const vp9_variance_fn_ptr_t *vf,
-                   int use_mvcost,
-                   const MV *center_mv,
-                   MV *best_mv);
-int vp9_bigdia_search(const MACROBLOCK *x,
-                      MV *ref_mv,
-                      int search_param,
-                      int error_per_bit,
-                      int do_init_search,
-                      const vp9_variance_fn_ptr_t *vf,
-                      int use_mvcost,
-                      const MV *center_mv,
-                      MV *best_mv);
-int vp9_square_search(const MACROBLOCK *x,
-                      MV *ref_mv,
-                      int search_param,
-                      int error_per_bit,
-                      int do_init_search,
-                      const vp9_variance_fn_ptr_t *vf,
-                      int use_mvcost,
-                      const MV *center_mv,
-                      MV *best_mv);
-int vp9_fast_hex_search(const MACROBLOCK *x,
-                        MV *ref_mv,
-                        int search_param,
-                        int sad_per_bit,
-                        const vp9_variance_fn_ptr_t *vfp,
-                        int use_mvcost,
-                        const MV *center_mv,
-                        MV *best_mv);
+typedef int (integer_mv_pattern_search_fn) (
+    const MACROBLOCK *x,
+    MV *ref_mv,
+    int search_param,
+    int error_per_bit,
+    int do_init_search,
+    const vp9_variance_fn_ptr_t *vf,
+    int use_mvcost,
+    const MV *center_mv,
+    MV *best_mv);
+
+integer_mv_pattern_search_fn vp9_hex_search;
+integer_mv_pattern_search_fn vp9_bigdia_search;
+integer_mv_pattern_search_fn vp9_square_search;
+integer_mv_pattern_search_fn vp9_fast_hex_search;
  
  typedef int (fractional_mv_step_fp) (
      const MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c

index 8dd4e6d..1e5ed82 100644 (file)
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -841,6 +841,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
    }
    if (speed >= 5) {
      int i;
+    sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
+                                  FLAG_EARLY_TERMINATE;
+    sf->use_fast_coef_costing = 0;
      sf->adaptive_rd_thresh = 5;
      sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
          RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
@@ -852,9 +855,15 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
        sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
        sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
      }
+    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
      sf->frame_parameter_update = 0;
      sf->encode_breakout_thresh = 1000;
      sf->search_method = FAST_HEX;
+    sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
+    sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->max_intra_bsize = BLOCK_32X32;
    }
    if (speed >= 6) {
      sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
@@ -933,6 +942,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
    sf->encode_breakout_thresh = 0;
    for (i = 0; i < BLOCK_SIZES; ++i)
      sf->disable_inter_mode_mask[i] = 0;
+  sf->max_intra_bsize = BLOCK_64X64;
  
    switch (cpi->oxcf.mode) {
      case MODE_BESTQUALITY:
@@ -1309,8 +1319,7 @@ static void set_tile_limits(VP9_COMP *cpi) {
    cm->log2_tile_rows = cpi->oxcf.tile_rows;
  }
  
-static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
+static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
    VP9_COMMON *const cm = &cpi->common;
    int i;
  
@@ -1335,7 +1344,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
    }
  
    // change includes all joint functionality
-  vp9_change_config(ptr, oxcf);
+  vp9_change_config(cpi, oxcf);
  
    // Initialize active best and worst q and average q values.
    if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
@@ -1379,8 +1388,7 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
      cpi->fixed_divide[i] = 0x80000 / i;
  }
  
-void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
+void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
    VP9_COMMON *const cm = &cpi->common;
  
    if (!cpi || !oxcf)
@@ -1698,30 +1706,19 @@ static void free_pick_mode_context(MACROBLOCK *x) {
    }
  }
  
-VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
+VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
    int i, j;
-  volatile union {
-    VP9_COMP *cpi;
-    VP9_PTR   ptr;
-  } ctx;
+  VP9_COMP *cpi = vpx_memalign(32, sizeof(VP9_COMP));
+  VP9_COMMON *cm = cpi != NULL ? &cpi->common : NULL;
  
-  VP9_COMP *cpi;
-  VP9_COMMON *cm;
-
-  cpi = ctx.cpi = vpx_memalign(32, sizeof(VP9_COMP));
-  // Check that the CPI instance is valid
-  if (!cpi)
-    return 0;
-
-  cm = &cpi->common;
+  if (!cm)
+    return NULL;
  
    vp9_zero(*cpi);
  
    if (setjmp(cm->error.jmp)) {
-    VP9_PTR ptr = ctx.ptr;
-
-    ctx.cpi->common.error.setjmp = 0;
-    vp9_remove_compressor(&ptr);
+    cm->error.setjmp = 0;
+    vp9_remove_compressor(cpi);
      return 0;
    }
  
@@ -1734,11 +1731,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
  
    cpi->use_svc = 0;
  
-  init_config((VP9_PTR)cpi, oxcf);
-
+  init_config(cpi, oxcf);
    init_pick_mode_context(cpi);
  
-  cm->current_video_frame   = 0;
+  cm->current_video_frame = 0;
  
    // Set reference frame sign bias for ALTREF frame to 1 (for now)
    cm->ref_frame_sign_bias[ALTREF_FRAME] = 1;
@@ -1746,8 +1742,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
    cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
  
    cpi->gold_is_last = 0;
-  cpi->alt_is_last  = 0;
-  cpi->gold_is_alt  = 0;
+  cpi->alt_is_last = 0;
+  cpi->gold_is_alt = 0;
  
    // Create the encoder segmentation map and set all entries to 0
    CHECK_MEM_ERROR(cm, cpi->segmentation_map,
@@ -1883,13 +1879,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
    if (cpi->pass == 1) {
      vp9_init_first_pass(cpi);
    } else if (cpi->pass == 2) {
-    size_t packet_sz = sizeof(FIRSTPASS_STATS);
-    int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
+    const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+    const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
  
      cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
      cpi->twopass.stats_in = cpi->twopass.stats_in_start;
-    cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in
-                                         + (packets - 1) * packet_sz);
+    cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
      vp9_init_second_pass(cpi);
    }
  
@@ -2015,11 +2010,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
    vp9_zero(cpi->mode_test_hits);
  #endif
  
-  return (VP9_PTR) cpi;
+  return cpi;
  }
  
-void vp9_remove_compressor(VP9_PTR *ptr) {
-  VP9_COMP *cpi = (VP9_COMP *)(*ptr);
+void vp9_remove_compressor(VP9_COMP *cpi) {
    int i;
  
    if (!cpi)
@@ -2126,7 +2120,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
  
    vp9_remove_common(&cpi->common);
    vpx_free(cpi);
-  *ptr = 0;
  
  #ifdef OUTPUT_YUV_SRC
    fclose(yuv_file);
@@ -2253,9 +2246,7 @@ static void generate_psnr_packet(VP9_COMP *cpi) {
    vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
  }
  
-int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
-
+int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
    if (ref_frame_flags > 7)
      return -1;
  
@@ -2263,9 +2254,7 @@ int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) {
    return 0;
  }
  
-int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
-
+int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
    if (ref_frame_flags > 7)
      return -1;
  
@@ -2299,9 +2288,8 @@ static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi,
    return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
  }
  
-int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                             YV12_BUFFER_CONFIG *sd) {
-  VP9_COMP *const cpi = (VP9_COMP *)ptr;
    YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
    if (cfg) {
      vp8_yv12_copy_frame(cfg, sd);
@@ -2311,8 +2299,7 @@ int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
    }
  }
  
-int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
+int vp9_get_reference_enc(VP9_COMP *cpi, int index, YV12_BUFFER_CONFIG **fb) {
    VP9_COMMON *cm = &cpi->common;
  
    if (index < 0 || index >= REF_FRAMES)
@@ -2322,9 +2309,8 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
    return 0;
  }
  
-int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                            YV12_BUFFER_CONFIG *sd) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
    YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
    if (cfg) {
      vp8_yv12_copy_frame(sd, cfg);
@@ -2334,9 +2320,9 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
    }
  }
  
-int vp9_update_entropy(VP9_PTR comp, int update) {
-  ((VP9_COMP *)comp)->ext_refresh_frame_context = update;
-  ((VP9_COMP *)comp)->ext_refresh_frame_context_pending = 1;
+int vp9_update_entropy(VP9_COMP * cpi, int update) {
+  cpi->ext_refresh_frame_context = update;
+  cpi->ext_refresh_frame_context_pending = 1;
    return 0;
  }
  
@@ -2681,7 +2667,6 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
    }
  
    if (lf->filter_level > 0) {
-    vp9_set_alt_lf_level(cpi, lf->filter_level);
      vp9_loop_filter_frame(cm, xd, lf->filter_level, 0, 0);
    }
  
@@ -3251,6 +3236,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
      set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
    }
  
+  vp9_set_speed_features(cpi);
+
    if (cpi->sf.recode_loop == DISALLOW_RECODE) {
      encode_without_recode_loop(cpi, size, dest, q);
    } else {
@@ -3452,10 +3439,9 @@ static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
  }
  
  
-int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
+int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
                            YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                            int64_t end_time) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
    VP9_COMMON *cm = &cpi->common;
    struct vpx_usec_timer timer;
    int res = 0;
@@ -3538,10 +3524,9 @@ void adjust_frame_rate(VP9_COMP *cpi) {
    cpi->last_end_time_stamp_seen = cpi->source->ts_end;
  }
  
-int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
+int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                              size_t *size, uint8_t *dest,
                              int64_t *time_stamp, int64_t *time_end, int flush) {
-  VP9_COMP *cpi = (VP9_COMP *) ptr;
    VP9_COMMON *cm = &cpi->common;
    MACROBLOCKD *xd = &cpi->mb.e_mbd;
    struct vpx_usec_timer  cmptimer;
@@ -3851,9 +3836,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
    return 0;
  }
  
-int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
+int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
                                vp9_ppflags_t *flags) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
    VP9_COMMON *cm = &cpi->common;
  
    if (!cm->show_frame) {
@@ -3881,11 +3865,10 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
    }
  }
  
-int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
+int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
                     unsigned int cols, int delta_q[MAX_SEGMENTS],
                     int delta_lf[MAX_SEGMENTS],
                     unsigned int threshold[MAX_SEGMENTS]) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
    signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
    struct segmentation *seg = &cpi->common.seg;
    int i;
@@ -3931,10 +3914,8 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
    return 0;
  }
  
-int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
                         unsigned int rows, unsigned int cols) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
-
    if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
      if (map) {
        vpx_memcpy(cpi->active_map, map, rows * cols);
@@ -3950,9 +3931,8 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
    }
  }
  
-int vp9_set_internal_size(VP9_PTR comp,
+int vp9_set_internal_size(VP9_COMP *cpi,
                            VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
    VP9_COMMON *cm = &cpi->common;
    int hr = 0, hs = 0, vr = 0, vs = 0;
  
@@ -3972,9 +3952,8 @@ int vp9_set_internal_size(VP9_PTR comp,
    return 0;
  }
  
-int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
+int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
                           unsigned int height) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
    VP9_COMMON *cm = &cpi->common;
  
    check_initial_width(cpi, 1, 1);
@@ -4009,8 +3988,7 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
    return 0;
  }
  
-void vp9_set_svc(VP9_PTR comp, int use_svc) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
+void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
    cpi->use_svc = use_svc;
    return;
  }
@@ -4040,6 +4018,6 @@ int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
  }
  
  
-int vp9_get_quantizer(VP9_PTR c) {
-  return ((VP9_COMP *)c)->common.base_qindex;
+int vp9_get_quantizer(VP9_COMP *cpi) {
+  return cpi->common.base_qindex;
  }
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h

index f1e5e3a..a8e7b69 100644 (file)
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -16,10 +16,11 @@
  #include "./vpx_config.h"
  #include "vpx_ports/mem.h"
  #include "vpx/internal/vpx_codec_internal.h"
+#include "vpx/vp8cx.h"
  
+#include "vp9/common/vp9_ppflags.h"
  #include "vp9/common/vp9_entropy.h"
  #include "vp9/common/vp9_entropymode.h"
-#include "vp9/common/vp9_onyx.h"
  #include "vp9/common/vp9_onyxc_int.h"
  
  #include "vp9/encoder/vp9_encodemb.h"
@@ -30,7 +31,6 @@
  #include "vp9/encoder/vp9_quantize.h"
  #include "vp9/encoder/vp9_ratectrl.h"
  #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_treewriter.h"
  #include "vp9/encoder/vp9_variance.h"
  
  #ifdef __cplusplus
@@ -424,6 +424,11 @@ typedef struct {
    // This feature controls whether we do the expensive context update and
    // calculation in the rd coefficient costing loop.
    int use_fast_coef_costing;
+
+  // This variable controls the maximum block size where intra blocks can be
+  // used in inter frames.
+  // TODO(aconverse): Fold this into one of the other many mode skips
+  BLOCK_SIZE max_intra_bsize;
  } SPEED_FEATURES;
  
  typedef struct {
@@ -436,6 +441,166 @@ typedef struct {
    int avg_frame_size;
  } LAYER_CONTEXT;
  
+#define MAX_SEGMENTS 8
+
+typedef enum {
+  NORMAL      = 0,
+  FOURFIVE    = 1,
+  THREEFIVE   = 2,
+  ONETWO      = 3
+} VPX_SCALING;
+
+typedef enum {
+  VP9_LAST_FLAG = 1,
+  VP9_GOLD_FLAG = 2,
+  VP9_ALT_FLAG = 4
+} VP9_REFFRAME;
+
+typedef enum {
+  USAGE_LOCAL_FILE_PLAYBACK   = 0x0,
+  USAGE_STREAM_FROM_SERVER    = 0x1,
+  USAGE_CONSTRAINED_QUALITY   = 0x2,
+  USAGE_CONSTANT_QUALITY      = 0x3,
+} END_USAGE;
+
+
+typedef enum {
+  MODE_GOODQUALITY    = 0x1,
+  MODE_BESTQUALITY    = 0x2,
+  MODE_FIRSTPASS      = 0x3,
+  MODE_SECONDPASS     = 0x4,
+  MODE_SECONDPASS_BEST = 0x5,
+  MODE_REALTIME       = 0x6,
+} MODE;
+
+typedef enum {
+  FRAMEFLAGS_KEY    = 1,
+  FRAMEFLAGS_GOLDEN = 2,
+  FRAMEFLAGS_ALTREF = 4,
+} FRAMETYPE_FLAGS;
+
+typedef enum {
+  NO_AQ = 0,
+  VARIANCE_AQ = 1,
+  COMPLEXITY_AQ = 2,
+  AQ_MODES_COUNT  // This should always be the last member of the enum
+} AQ_MODES;
+
+typedef struct {
+  int version;  // 4 versions of bitstream defined:
+                //   0 - best quality/slowest decode,
+                //   3 - lowest quality/fastest decode
+  int width;  // width of data passed to the compressor
+  int height;  // height of data passed to the compressor
+  double framerate;  // set to passed in framerate
+  int64_t target_bandwidth;  // bandwidth to be used in kilobits per second
+
+  int noise_sensitivity;  // pre processing blur: recommendation 0
+  int sharpness;  // sharpening output: recommendation 0:
+  int cpu_used;
+  unsigned int rc_max_intra_bitrate_pct;
+
+  // mode ->
+  // (0)=Realtime/Live Encoding. This mode is optimized for realtime
+  //     encoding (for example, capturing a television signal or feed from
+  //     a live camera). ( speed setting controls how fast )
+  // (1)=Good Quality Fast Encoding. The encoder balances quality with the
+  //     amount of time it takes to encode the output. ( speed setting
+  //     controls how fast )
+  // (2)=One Pass - Best Quality. The encoder places priority on the
+  //     quality of the output over encoding speed. The output is compressed
+  //     at the highest possible quality. This option takes the longest
+  //     amount of time to encode. ( speed setting ignored )
+  // (3)=Two Pass - First Pass. The encoder generates a file of statistics
+  //     for use in the second encoding pass. ( speed setting controls how
+  //     fast )
+  // (4)=Two Pass - Second Pass. The encoder uses the statistics that were
+  //     generated in the first encoding pass to create the compressed
+  //     output. ( speed setting controls how fast )
+  // (5)=Two Pass - Second Pass Best.  The encoder uses the statistics that
+  //     were generated in the first encoding pass to create the compressed
+  //     output using the highest possible quality, and taking a
+  //    longer amount of time to encode.. ( speed setting ignored )
+  int mode;
+
+  // Key Framing Operations
+  int auto_key;  // autodetect cut scenes and set the keyframes
+  int key_freq;  // maximum distance to key frame.
+
+  int lag_in_frames;  // how many frames lag before we start encoding
+
+  // ----------------------------------------------------------------
+  // DATARATE CONTROL OPTIONS
+
+  int end_usage;  // vbr or cbr
+
+  // buffer targeting aggressiveness
+  int under_shoot_pct;
+  int over_shoot_pct;
+
+  // buffering parameters
+  int64_t starting_buffer_level;  // in seconds
+  int64_t optimal_buffer_level;
+  int64_t maximum_buffer_size;
+
+  // Frame drop threshold.
+  int drop_frames_water_mark;
+
+  // controlling quality
+  int fixed_q;
+  int worst_allowed_q;
+  int best_allowed_q;
+  int cq_level;
+  int lossless;
+  int aq_mode;  // Adaptive Quantization mode
+
+  // two pass datarate control
+  int two_pass_vbrbias;        // two pass datarate control tweaks
+  int two_pass_vbrmin_section;
+  int two_pass_vbrmax_section;
+  // END DATARATE CONTROL OPTIONS
+  // ----------------------------------------------------------------
+
+  // Spatial and temporal scalability.
+  int ss_number_layers;  // Number of spatial layers.
+  int ts_number_layers;  // Number of temporal layers.
+  // Bitrate allocation for spatial layers.
+  int ss_target_bitrate[VPX_SS_MAX_LAYERS];
+  // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
+  int ts_target_bitrate[VPX_TS_MAX_LAYERS];
+  int ts_rate_decimator[VPX_TS_MAX_LAYERS];
+
+  // these parameters aren't to be used in final build don't use!!!
+  int play_alternate;
+  int alt_freq;
+
+  int encode_breakout;  // early breakout : for video conf recommend 800
+
+  /* Bitfield defining the error resiliency features to enable.
+   * Can provide decodable frames after losses in previous
+   * frames and decodable partitions after losses in the same frame.
+   */
+  unsigned int error_resilient_mode;
+
+  /* Bitfield defining the parallel decoding mode where the
+   * decoding in successive frames may be conducted in parallel
+   * just by decoding the frame headers.
+   */
+  unsigned int frame_parallel_decoding_mode;
+
+  int arnr_max_frames;
+  int arnr_strength;
+  int arnr_type;
+
+  int tile_columns;
+  int tile_rows;
+
+  struct vpx_fixed_buf         two_pass_stats_in;
+  struct vpx_codec_pkt_list  *output_pkt_list;
+
+  vp8e_tuning tuning;
+} VP9_CONFIG;
+
  typedef struct VP9_COMP {
    DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
    DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -684,11 +849,6 @@ typedef struct VP9_COMP {
    int max_arf_level;
  #endif
  
-#ifdef ENTROPY_STATS
-  int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
-#endif
-
-
  #ifdef MODE_TEST_HIT_STATS
    // Debug / test stats
    int64_t mode_test_hits[BLOCK_SIZES];
@@ -702,6 +862,60 @@ typedef struct VP9_COMP {
    PARTITION_CONTEXT left_seg_context[8];
  } VP9_COMP;
  
+void vp9_initialize_enc();
+
+struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf);
+void vp9_remove_compressor(VP9_COMP *cpi);
+
+void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf);
+
+  // receive a frames worth of data. caller can assume that a copy of this
+  // frame is made and not just a copy of the pointer..
+int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
+                          YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+                          int64_t end_time_stamp);
+
+int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
+                            size_t *size, uint8_t *dest,
+                            int64_t *time_stamp, int64_t *time_end, int flush);
+
+int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
+                              vp9_ppflags_t *flags);
+
+int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
+
+int vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
+
+int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
+                           YV12_BUFFER_CONFIG *sd);
+
+int vp9_get_reference_enc(VP9_COMP *cpi, int index,
+                          YV12_BUFFER_CONFIG **fb);
+
+int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
+                          YV12_BUFFER_CONFIG *sd);
+
+int vp9_update_entropy(VP9_COMP *cpi, int update);
+
+int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map,
+                   unsigned int rows, unsigned int cols,
+                   int delta_q[MAX_SEGMENTS],
+                   int delta_lf[MAX_SEGMENTS],
+                   unsigned int threshold[MAX_SEGMENTS]);
+
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
+                       unsigned int rows, unsigned int cols);
+
+int vp9_set_internal_size(VP9_COMP *cpi,
+                          VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
+
+int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
+                         unsigned int height);
+
+void vp9_set_svc(VP9_COMP *cpi, int use_svc);
+
+int vp9_get_quantizer(struct VP9_COMP *cpi);
+
  static int get_ref_frame_idx(const VP9_COMP *cpi,
                               MV_REFERENCE_FRAME ref_frame) {
    if (ref_frame == LAST_FRAME) {
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c

index 5b0ecf7..b5f4901 100644 (file)
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -21,27 +21,18 @@
  #include "vp9/common/vp9_loopfilter.h"
  #include "./vpx_scale_rtcd.h"
  
-static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) {
-  return 0;
-}
-
-static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
+static int get_max_filter_level(VP9_COMP *cpi) {
    return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
                                                 : MAX_LOOP_FILTER;
  }
  
-// Stub function for now Alt LF not used
-void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
-}
  
  static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
                              MACROBLOCKD *const xd, VP9_COMMON *const cm,
                              int filt_level, int partial_frame) {
    int filt_err;
  
-  vp9_set_alt_lf_level(cpi, filt_level);
    vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame);
-
    filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
  
    // Re-instate the unfiltered frame
@@ -55,8 +46,8 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
    VP9_COMMON *const cm = &cpi->common;
    struct loopfilter *const lf = &cm->lf;
-  const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-  const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+  const int min_filter_level = 0;
+  const int max_filter_level = get_max_filter_level(cpi);
    int best_err;
    int filt_best;
    int filt_direction = 0;
@@ -148,8 +139,8 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
                                                      : cpi->oxcf.sharpness;
  
    if (method == 2) {
-    const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-    const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+    const int min_filter_level = 0;
+    const int max_filter_level = get_max_filter_level(cpi);
      const int q = vp9_ac_quant(cm->base_qindex, 0);
      // These values were determined by linear fitting the result of the
      // searched level
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h

index 0fc1f88..203ef87 100644 (file)
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -19,8 +19,6 @@ extern "C" {
  struct yv12_buffer_config;
  struct VP9_COMP;
  
-void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
-
  void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
                             struct VP9_COMP *cpi, int method);
  #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c

index f9d5909..3a73d3e 100644 (file)
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -34,11 +34,11 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
    MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
    struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
    int bestsme = INT_MAX;
-  int further_steps, step_param;
+  int step_param;
    int sadpb = x->sadperbit16;
    MV mvp_full;
    int ref = mbmi->ref_frame[0];
-  int_mv ref_mv = mbmi->ref_mvs[ref][0];
+  const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
    int i;
  
    int tmp_col_min = x->mv_col_min;
@@ -62,12 +62,11 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
      vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
    }
  
-  vp9_set_mv_search_range(x, &ref_mv.as_mv);
+  vp9_set_mv_search_range(x, &ref_mv);
  
    // TODO(jingning) exploiting adaptive motion search control in non-RD
    // mode decision too.
    step_param = 6;
-  further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
  
    for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
      if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
@@ -88,26 +87,32 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
    mvp_full.row >>= 3;
  
    if (cpi->sf.search_method == FAST_HEX) {
-    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
-                                  &cpi->fn_ptr[bsize], 1,
-                                  &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
+                        &cpi->fn_ptr[bsize], 1,
+                        &ref_mv, &tmp_mv->as_mv);
    } else if (cpi->sf.search_method == HEX) {
-    bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
-                             &cpi->fn_ptr[bsize], 1,
-                             &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
+                   &cpi->fn_ptr[bsize], 1,
+                   &ref_mv, &tmp_mv->as_mv);
    } else if (cpi->sf.search_method == SQUARE) {
-    bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
-                                &cpi->fn_ptr[bsize], 1,
-                                &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
+                      &cpi->fn_ptr[bsize], 1,
+                      &ref_mv, &tmp_mv->as_mv);
    } else if (cpi->sf.search_method == BIGDIA) {
-    bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
-                                &cpi->fn_ptr[bsize], 1,
-                                &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
+                      &cpi->fn_ptr[bsize], 1,
+                      &ref_mv, &tmp_mv->as_mv);
    } else {
-    bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
-                                     sadpb, further_steps, 1,
-                                     &cpi->fn_ptr[bsize],
-                                     &ref_mv.as_mv, &tmp_mv->as_mv);
+    int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+    // NOTE: this returns variance
+    vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+                           sadpb, further_steps, 1,
+                           &cpi->fn_ptr[bsize],
+                           &ref_mv, &tmp_mv->as_mv);
    }
    x->mv_col_min = tmp_col_min;
    x->mv_col_max = tmp_col_max;
@@ -134,7 +139,7 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
    tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8;
  
    // calculate the bit cost on motion vector
-  *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+  *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    return bestsme;
  }
@@ -182,6 +187,31 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
    }
  }
  
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
+                              MACROBLOCK *x, MACROBLOCKD *xd,
+                              int *out_rate_sum, int64_t *out_dist_sum) {
+  // Note our transform coeffs are 8 times an orthogonal transform.
+  // Hence quantizer step is also 8 times. To get effective quantizer
+  // we need to divide by 8 before sending to modeling function.
+  unsigned int sse;
+  int rate;
+  int64_t dist;
+
+
+  struct macroblock_plane *const p = &x->plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
+  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
+  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+                            pd->dst.buf, pd->dst.stride, &sse);
+
+  vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+                               pd->dequant[1] >> 3, &rate, &dist);
+
+  *out_rate_sum = rate;
+  *out_dist_sum = dist << 4;
+}
+
  // TODO(jingning) placeholder for inter-frame non-RD mode decision.
  // this needs various further optimizations. to be continued..
  int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -203,11 +233,13 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      VP9_ALT_FLAG };
    int64_t best_rd = INT64_MAX;
    int64_t this_rd = INT64_MAX;
-  static const int cost[4]= { 0, 2, 4, 6 };
  
    const int64_t inter_mode_thresh = 300;
    const int64_t intra_mode_cost = 50;
  
+  int rate = INT_MAX;
+  int64_t dist = INT64_MAX;
+
    x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  
    x->skip = 0;
@@ -240,7 +272,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
    }
  
    for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
-    int rate_mv = 0;
      if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
        continue;
  
@@ -253,9 +284,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
      mbmi->ref_frame[0] = ref_frame;
  
      for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
-      int rate = cost[INTER_OFFSET(this_mode)]
-          << (num_pels_log2_lookup[bsize] - 4);
-      int64_t dist;
+      int rate_mv = 0;
+
        if (cpi->sf.disable_inter_mode_mask[bsize] &
            (1 << INTER_OFFSET(this_mode)))
          continue;
@@ -275,22 +305,20 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  &frame_mv[NEWMV][ref_frame].as_mv);
        }
  
-      if (frame_mv[this_mode][ref_frame].as_int == 0) {
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
-      } else if (this_mode != NEARESTMV &&
-                 frame_mv[NEARESTMV][ref_frame].as_int ==
-                     frame_mv[this_mode][ref_frame].as_int) {
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(NEARESTMV)];
-      } else {
-        mbmi->mode = this_mode;
-        mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
-        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] =
-            cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
-                                   pd->dst.buf, pd->dst.stride, INT_MAX);
-      }
+      if (this_mode != NEARESTMV)
+        if (frame_mv[this_mode][ref_frame].as_int ==
+            frame_mv[NEARESTMV][ref_frame].as_int)
+          continue;
  
-      this_rd = rate + dist;
+      mbmi->mode = this_mode;
+      mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+      vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+
+      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      rate += rate_mv;
+      rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                [INTER_OFFSET(this_mode)];
+      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
  
        if (this_rd < best_rd) {
          best_rd = this_rd;
@@ -314,10 +342,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                &p->src.buf[0], p->src.stride,
                                &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
  
-      this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf,
-                                       p->src.stride,
-                                       pd->dst.buf,
-                                       pd->dst.stride, INT_MAX);
+      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      rate += x->mbmode_cost[this_mode];
+      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
  
        if (this_rd + intra_mode_cost < best_rd) {
          best_rd = this_rd;
@@ -328,5 +355,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        }
      }
    }
+
    return INT64_MAX;
  }
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c

index 89aa821..8430e4b 100644 (file)
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -587,6 +587,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
                                                     q_adj_factor);
      }
    } else if (!rc->is_src_frame_alt_ref &&
+             !cpi->use_svc &&
               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
      // Use the lower of active_worst_quality and recent
      // average Q as basis for GF/ARF best Q limit unless last frame was
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index e2d2d5a..361bbd9 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -30,6 +30,7 @@
  #include "vp9/common/vp9_seg_common.h"
  #include "vp9/common/vp9_systemdependent.h"
  
+#include "vp9/encoder/vp9_cost.h"
  #include "vp9/encoder/vp9_encodemb.h"
  #include "vp9/encoder/vp9_encodemv.h"
  #include "vp9/encoder/vp9_mcomp.h"
@@ -38,7 +39,6 @@
  #include "vp9/encoder/vp9_ratectrl.h"
  #include "vp9/encoder/vp9_rdopt.h"
  #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_treewriter.h"
  #include "vp9/encoder/vp9_variance.h"
  
  /* Factor to weigh the rate for switchable interp filters */
@@ -157,10 +157,10 @@ static void fill_mode_costs(VP9_COMP *cpi) {
  
    // TODO(rbultje) separate tables for superblock costing?
    vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[1],
-                  fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[0],
-                  vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
+                  vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
+                  fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
  
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
      vp9_cost_tokens((int *)x->switchable_interp_costs[i],
@@ -282,14 +282,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
    x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
    x->errorperbit += (x->errorperbit == 0);
  
-  vp9_set_speed_features(cpi);
-
    x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                           cm->frame_type != KEY_FRAME) ? 0 : 1;
  
    set_block_thresholds(cpi);
  
-  if (!cpi->sf.use_nonrd_pick_mode) {
+  if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
      fill_token_costs(x->token_costs, cm->fc.coef_probs);
  
      for (i = 0; i < PARTITION_CONTEXTS; i++)
@@ -297,7 +295,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
                        vp9_partition_tree);
    }
  
-  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
+  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+      cm->frame_type == KEY_FRAME) {
      fill_mode_costs(cpi);
  
      if (!frame_is_intra_only(cm)) {
@@ -396,9 +395,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
    *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
  }
  
-static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
-                                     unsigned int qstep, int *rate,
-                                     int64_t *dist) {
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+                                  unsigned int qstep, int *rate,
+                                  int64_t *dist) {
    // This function models the rate and distortion for a Laplacian
    // source with given variance when quantized with a uniform quantizer
    // with given stepsize. The closed form expressions are in:
@@ -460,8 +459,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
      } else {
        int rate;
        int64_t dist;
-      model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
-                               pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+                                   pd->dequant[1] >> 3, &rate, &dist);
        rate_sum += rate;
        dist_sum += dist;
      }
@@ -508,7 +507,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
                           &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                           &sse);
        // sse works better than var, since there is no dc prediction used
-      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
+                                   &rate, &dist);
        rate_sum += rate;
        dist_sum += dist;
        *out_skip &= (rate < 1024);
@@ -1037,7 +1037,7 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode,
  
  static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                       MB_PREDICTION_MODE *best_mode,
-                                     int *bmode_costs,
+                                     const int *bmode_costs,
                                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                       int *bestrate, int *bestratey,
                                       int64_t *bestdistortion,
@@ -1162,14 +1162,12 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
    return best_rd;
  }
  
-static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
-                                            MACROBLOCK * const mb,
-                                            int * const rate,
-                                            int * const rate_y,
-                                            int64_t * const distortion,
+static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
+                                            int *rate, int *rate_y,
+                                            int64_t *distortion,
                                              int64_t best_rd) {
    int i, j;
-  MACROBLOCKD *const xd = &mb->e_mbd;
+  const MACROBLOCKD *const xd = &mb->e_mbd;
    MODE_INFO *const mic = xd->mi_8x8[0];
    const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
    const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
@@ -1182,13 +1180,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
    int tot_rate_y = 0;
    int64_t total_rd = 0;
    ENTROPY_CONTEXT t_above[4], t_left[4];
-  int *bmode_costs;
+  const int *bmode_costs = mb->mbmode_cost;
  
    vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
    vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
  
-  bmode_costs = mb->mbmode_cost;
-
    // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
    for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1463,12 +1459,6 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
    }
  }
  
-void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
-                            const MV *mv) {
-  xd->mi_8x8[0]->mbmi.mode = mode;
-  xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv;
-}
-
  static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize,
                                  int_mv *frame_mv,
@@ -1476,59 +1466,56 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  int_mv single_newmv[MAX_REF_FRAMES],
                                  int *rate_mv);
  
-static int labels2mode(MACROBLOCK *x, int i,
+static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
                         MB_PREDICTION_MODE mode,
-                       int_mv *this_mv, int_mv *this_second_mv,
+                       int_mv this_mv[2],
                         int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
                         int_mv seg_mvs[MAX_REF_FRAMES],
-                       int_mv *best_ref_mv,
-                       int_mv *second_best_ref_mv,
-                       int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
-  MACROBLOCKD *const xd = &x->e_mbd;
+                       int_mv *best_ref_mv[2],
+                       const int *mvjcost, int *mvcost[2]) {
    MODE_INFO *const mic = xd->mi_8x8[0];
-  MB_MODE_INFO *mbmi = &mic->mbmi;
+  const MB_MODE_INFO *const mbmi = &mic->mbmi;
    int thismvcost = 0;
    int idx, idy;
    const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
    const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
-  const int has_second_rf = has_second_ref(mbmi);
+  const int is_compound = has_second_ref(mbmi);
  
    // the only time we should do costing for new motion vector or mode
    // is when we are on a new label  (jbb May 08, 2007)
    switch (mode) {
      case NEWMV:
-      this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
-      thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+      this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+      thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
                                      mvjcost, mvcost, MV_COST_WEIGHT_SUB);
-      if (has_second_rf) {
-        this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
-        thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
-                                      &second_best_ref_mv->as_mv,
+      if (is_compound) {
+        this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+        thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
                                        mvjcost, mvcost, MV_COST_WEIGHT_SUB);
        }
        break;
      case NEARESTMV:
-      this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
        break;
      case NEARMV:
-      this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
        break;
      case ZEROMV:
-      this_mv->as_int = 0;
-      if (has_second_rf)
-        this_second_mv->as_int = 0;
+      this_mv[0].as_int = 0;
+      if (is_compound)
+        this_mv[1].as_int = 0;
        break;
      default:
        break;
    }
  
-  mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
-  if (has_second_rf)
-    mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
+  mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
+  if (is_compound)
+    mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
  
    mic->bmi[i].as_mode = mode;
  
@@ -1704,6 +1691,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
    int mode_idx;
    int subpelmv = 1, have_ref = 0;
    const int has_second_rf = has_second_ref(mbmi);
+  const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
  
    vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
    vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
@@ -1719,7 +1707,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
        // TODO(jingning,rbultje): rewrite the rate-distortion optimization
        // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
-      int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
+      int_mv mode_mv[MB_MODE_COUNT][2];
        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
        MB_PREDICTION_MODE mode_selected = ZEROMV;
        int64_t best_rd = INT64_MAX;
@@ -1741,11 +1729,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
  
          mode_idx = INTER_OFFSET(this_mode);
          bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
-        if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << mode_idx))
+        if (disable_inter_mode_mask & (1 << mode_idx))
            continue;
  
          // if we're near/nearest and mv == 0,0, compare to zeromv
-        if ((this_mode == NEARMV || this_mode == NEARESTMV ||
+        if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+            (this_mode == NEARMV || this_mode == NEARESTMV ||
               this_mode == ZEROMV) &&
              frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
              (!has_second_rf ||
@@ -1790,7 +1779,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
          // motion search for newmv (single predictor case only)
          if (!has_second_rf && this_mode == NEWMV &&
              seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
-          int_mv *const new_mv = &mode_mv[NEWMV];
+          int_mv *const new_mv = &mode_mv[NEWMV][0];
            int step_param = 0;
            int further_steps;
            int thissme, bestsme = INT_MAX;
@@ -1848,18 +1837,30 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                       sadpb, 1, v_fn_ptr, 1,
                                       &bsi->ref_mv[0]->as_mv,
                                       &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
            } else if (cpi->sf.search_method == SQUARE) {
              bestsme = vp9_square_search(x, &mvp_full,
                                          step_param,
                                          sadpb, 1, v_fn_ptr, 1,
                                          &bsi->ref_mv[0]->as_mv,
                                          &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
            } else if (cpi->sf.search_method == BIGDIA) {
              bestsme = vp9_bigdia_search(x, &mvp_full,
                                          step_param,
                                          sadpb, 1, v_fn_ptr, 1,
                                          &bsi->ref_mv[0]->as_mv,
                                          &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
            } else {
              bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                               sadpb, further_steps, 0, v_fn_ptr,
@@ -1938,55 +1939,43 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
          }
  
          bsi->rdstat[i][mode_idx].brate =
-            labels2mode(x, i, this_mode, &mode_mv[this_mode],
-                        &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
-                        bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                        x->mvcost, cpi);
-
-
-        bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_wide > 1)
-          bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_high > 1)
-          bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (has_second_rf) {
-          bsi->rdstat[i][mode_idx].mvs[1].as_int =
-              second_mode_mv[this_mode].as_int;
+            labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+                        seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
+
+        for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+          bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+              mode_mv[this_mode][ref].as_int;
            if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
            if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
          }
  
          // Trap vectors that reach beyond the UMV borders
-        if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
+        if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
              (has_second_rf &&
-             mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
+             mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
            continue;
  
          if (filter_idx > 0) {
            BEST_SEG_INFO *ref_bsi = bsi_buf;
-          subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv);
-          have_ref = mode_mv[this_mode].as_int ==
-                         ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-          if (has_second_rf) {
-            subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv);
-            have_ref &= second_mode_mv[this_mode].as_int ==
-                            ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
+          subpelmv = 0;
+          have_ref = 1;
+
+          for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+            subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
+            have_ref &= mode_mv[this_mode][ref].as_int ==
+                ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
            }
  
            if (filter_idx > 1 && !subpelmv && !have_ref) {
              ref_bsi = bsi_buf + 1;
-            have_ref = mode_mv[this_mode].as_int ==
-                       ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-            if (has_second_rf) {
-              have_ref  &= second_mode_mv[this_mode].as_int ==
-                           ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
-            }
+            have_ref = 1;
+            for (ref = 0; ref < 1 + has_second_rf; ++ref)
+              have_ref &= mode_mv[this_mode][ref].as_int ==
+                  ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
            }
  
            if (!subpelmv && have_ref &&
@@ -2047,10 +2036,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
        vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
        vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
  
-      labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
-                  &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
-                  bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                  x->mvcost, cpi);
+      labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
+                  frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
+                  x->mvcost);
  
        br += bsi->rdstat[i][mode_idx].brate;
        bd += bsi->rdstat[i][mode_idx].bdist;
@@ -2470,21 +2458,33 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
    further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
  
    if (cpi->sf.search_method == FAST_HEX) {
-    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
                                    &cpi->fn_ptr[bsize], 1,
                                    &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
    } else if (cpi->sf.search_method == HEX) {
      bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
                               &cpi->fn_ptr[bsize], 1,
                               &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
    } else if (cpi->sf.search_method == SQUARE) {
      bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
                                  &cpi->fn_ptr[bsize], 1,
                                  &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
    } else if (cpi->sf.search_method == BIGDIA) {
      bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
                                  &cpi->fn_ptr[bsize], 1,
                                  &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
    } else {
      bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                       sadpb, further_steps, 1,
@@ -2611,6 +2611,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                         x->nmvjointcost, x->mvcost,
                                         &ref_mv[id].as_mv, second_pred,
                                         pw, ph);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+                                      second_pred, &cpi->fn_ptr[bsize], 1);
  
      x->mv_col_min = tmp_col_min;
      x->mv_col_max = tmp_col_max;
@@ -3158,6 +3161,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
    const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
    const int intra_y_mode_mask =
        cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
+  const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
  
    x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  
@@ -3256,6 +3260,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
      mode_skip_mask |= new_modes_mask;
    }
  
+  if (bsize > cpi->sf.max_intra_bsize) {
+    mode_skip_mask |= 0xFF30808;
+  }
+
    for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
      int mode_excluded = 0;
      int64_t this_rd = INT64_MAX;
@@ -3302,7 +3310,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
      this_mode = vp9_mode_order[mode_index].mode;
      ref_frame = vp9_mode_order[mode_index].ref_frame[0];
      if (ref_frame != INTRA_FRAME &&
-        cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode)))
+        disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
        continue;
      second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
  
@@ -3346,7 +3354,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
        }
      } else {
        // if we're near/nearest and mv == 0,0, compare to zeromv
-      if ((this_mode == NEARMV || this_mode == NEARESTMV ||
+      if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+          (this_mode == NEARMV || this_mode == NEARESTMV ||
            this_mode == ZEROMV) &&
            frame_mv[this_mode][ref_frame].as_int == 0 &&
            !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h

index b5baa33..7d7aeed 100644 (file)
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -41,6 +41,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi);
  
  void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
  
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+                                  unsigned int qstep, int *rate,
+                                  int64_t *dist);
+
  void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
                              const TileInfo *const tile,
                              MV_REFERENCE_FRAME ref_frame,
@@ -77,9 +81,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
  
  void vp9_init_me_luts();
  
-void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
-                            const MV *mv);
-
  void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                                const struct macroblockd_plane *pd,
                                ENTROPY_CONTEXT t_above[16],
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c

index 49fd7bb..fd8fa53 100644 (file)
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -16,6 +16,7 @@
  #include "vp9/common/vp9_pred_common.h"
  #include "vp9/common/vp9_tile_common.h"
  
+#include "vp9/encoder/vp9_cost.h"
  #include "vp9/encoder/vp9_segmentation.h"
  
  void vp9_enable_segmentation(struct segmentation *seg) {
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c

index fdc2106..fd82fa3 100644 (file)
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -11,7 +11,7 @@
  #include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_entropy.h"
  
-#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_cost.h"
  #include "vp9/encoder/vp9_writer.h"
  
  #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c

index e8179f3..a293dd8 100644 (file)
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -8,18 +8,20 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-
+#include <assert.h>
  #include <math.h>
  #include <stdio.h>
  #include <string.h>
-#include <assert.h>
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_tokenize.h"
+
  #include "vpx_mem/vpx_mem.h"
  
+#include "vp9/common/vp9_entropy.h"
  #include "vp9/common/vp9_pred_common.h"
  #include "vp9/common/vp9_seg_common.h"
-#include "vp9/common/vp9_entropy.h"
+
+#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_tokenize.h"
  
  static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
  const TOKENVALUE *vp9_dct_value_tokens_ptr;
diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c

index 35e5a8f..bb04b40 100644 (file)
--- a/vp9/encoder/vp9_treewriter.c
+++ b/vp9/encoder/vp9_treewriter.c
@@ -10,33 +10,6 @@
  
  #include "vp9/encoder/vp9_treewriter.h"
  
-static void cost(int *costs, vp9_tree tree, const vp9_prob *probs,
-                 int i, int c) {
-  const vp9_prob prob = probs[i / 2];
-  int b;
-
-  for (b = 0; b <= 1; ++b) {
-    const int cc = c + vp9_cost_bit(prob, b);
-    const vp9_tree_index ii = tree[i + b];
-
-    if (ii <= 0)
-      costs[-ii] = cc;
-    else
-      cost(costs, tree, probs, ii, cc);
-  }
-}
-
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) {
-  cost(costs, tree, probs, 0, 0);
-}
-
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) {
-  assert(tree[0] <= 0 && tree[1] > 0);
-
-  costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
-  cost(costs, tree, probs, 2, 0);
-}
-
  static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree,
                       int i, int v, int l) {
    v += v;
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h

index fedfbe9..4a76d87 100644 (file)
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -17,35 +17,6 @@
  extern "C" {
  #endif
  
-#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
-
-#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
-
-#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
-                                                    : (prob))
-
-static INLINE unsigned int cost_branch256(const unsigned int ct[2],
-                                          vp9_prob p) {
-  return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
-}
-
-static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs,
-                             int bits, int len) {
-  int cost = 0;
-  vp9_tree_index i = 0;
-
-  do {
-    const int bit = (bits >> --len) & 1;
-    cost += vp9_cost_bit(probs[i >> 1], bit);
-    i = tree[i + bit];
-  } while (len);
-
-  return cost;
-}
-
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
-
  void vp9_tree_probs_from_distribution(vp9_tree tree,
                                        unsigned int branch_ct[ /* n - 1 */ ][2],
                                        const unsigned int num_events[ /* n */ ]);
diff --git a/vp9/encoder/vp9_writer.c b/vp9/encoder/vp9_writer.c

index fda1b39..8398fc0 100644 (file)
--- a/vp9/encoder/vp9_writer.c
+++ b/vp9/encoder/vp9_writer.c
@@ -12,34 +12,6 @@
  #include "vp9/encoder/vp9_writer.h"
  #include "vp9/common/vp9_entropy.h"
  
-#ifdef ENTROPY_STATS
-unsigned int active_section = 0;
-#endif
-
-const unsigned int vp9_prob_cost[256] = {
-  2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
-  1129, 1099, 1072, 1046, 1023, 1000, 979,  959,  940,  922,  905,  889,
-  873,  858,  843,  829,  816,  803,  790,  778,  767,  755,  744,  733,
-  723,  713,  703,  693,  684,  675,  666,  657,  649,  641,  633,  625,
-  617,  609,  602,  594,  587,  580,  573,  567,  560,  553,  547,  541,
-  534,  528,  522,  516,  511,  505,  499,  494,  488,  483,  477,  472,
-  467,  462,  457,  452,  447,  442,  437,  433,  428,  424,  419,  415,
-  410,  406,  401,  397,  393,  389,  385,  381,  377,  373,  369,  365,
-  361,  357,  353,  349,  346,  342,  338,  335,  331,  328,  324,  321,
-  317,  314,  311,  307,  304,  301,  297,  294,  291,  288,  285,  281,
-  278,  275,  272,  269,  266,  263,  260,  257,  255,  252,  249,  246,
-  243,  240,  238,  235,  232,  229,  227,  224,  221,  219,  216,  214,
-  211,  208,  206,  203,  201,  198,  196,  194,  191,  189,  186,  184,
-  181,  179,  177,  174,  172,  170,  168,  165,  163,  161,  159,  156,
-  154,  152,  150,  148,  145,  143,  141,  139,  137,  135,  133,  131,
-  129,  127,  125,  123,  121,  119,  117,  115,  113,  111,  109,  107,
-  105,  103,  101,  99,   97,   95,   93,   92,   90,   88,   86,   84,
-  82,   81,   79,   77,   75,   73,   72,   70,   68,   66,   65,   63,
-  61,   60,   58,   56,   55,   53,   51,   50,   48,   46,   45,   43,
-  41,   40,   38,   37,   35,   33,   32,   30,   29,   27,   25,   24,
-  22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,
-  4,    3,    1,    1};
-
  void vp9_start_encode(vp9_writer *br, uint8_t *source) {
    br->lowvalue = 0;
    br->range    = 255;
diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h

index defeec3..7f4fa1e 100644 (file)
--- a/vp9/encoder/vp9_writer.h
+++ b/vp9/encoder/vp9_writer.h
@@ -32,8 +32,6 @@ typedef struct {
    uint64_t bit_counter;
  } vp9_writer;
  
-extern const unsigned int vp9_prob_cost[256];
-
  void vp9_start_encode(vp9_writer *bc, uint8_t *buffer);
  void vp9_stop_encode(vp9_writer *bc);
  
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c

index f3735eb..6865822 100644 (file)
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -13,39 +13,80 @@
  #include "vpx_ports/mem.h"
  
  void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
-  // The 2D transform is done with two passes which are actually pretty
-  // similar. In the first one, we transform the columns and transpose
-  // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we transpose the columns (that
-  // is the transposed rows) and transpose the results (so that it goes back
-  // in normal/row positions).
-  int pass;
+  // This 2D transform implements 4 vertical 1D transforms followed
+  // by 4 horizontal 1D transforms.  The multiplies and adds are as given
+  // by Chen, Smith and Fralick ('77).  The commands for moving the data
+  // around have been minimized by hand.
+  // For the purposes of the comments, the 16 inputs are referred to at i0
+  // through iF (in raster order), intermediate variables are a0, b0, c0
+  // through f, and correspond to the in-place computations mapped to input
+  // locations.  The outputs, o0 through oF are labeled according to the
+  // output locations.
+
    // Constants
-  //    When we use them, in one case, they are all the same. In all others
-  //    it's a pair of them that we need to repeat four times. This is done
-  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
-  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  // These are the coefficients used for the multiplies.
+  // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
+  // where cospi_N_64 = cos(N pi /64)
+  const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64,
+                                            -cospi_8_64, -cospi_24_64,
+                                            -cospi_8_64, -cospi_24_64);
+  const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            -cospi_24_64, cospi_8_64,
+                                            -cospi_24_64, cospi_8_64);
+
    const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  // This second rounding constant saves doing some extra adds at the end
+  const __m128i k__DCT_CONST_ROUNDING2 = _mm_set1_epi32(DCT_CONST_ROUNDING
+                                               +(DCT_CONST_ROUNDING << 1));
+  const int DCT_CONST_BITS2 =  DCT_CONST_BITS+2;
    const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
    const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
-  const __m128i kOne = _mm_set1_epi16(1);
    __m128i in0, in1;
+
    // Load inputs.
    {
      in0  = _mm_loadl_epi64((const __m128i *)(input +  0 * stride));
+    in1  = _mm_loadl_epi64((const __m128i *)(input +  1 * stride));
+    in1  = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *)
+           (input +  2 * stride)));
      in0  = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *)
-           (input +  1 * stride)));
-    in1  = _mm_loadl_epi64((const __m128i *)(input +  2 * stride));
-    in1  = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)
-           (input +  3 * stride)), in1);
+           (input +  3 * stride)));
+    // in0 = [i0 i1 i2 i3 iC iD iE iF]
+    // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+
  
-    // x = x << 4
+    // multiply by 16 to give some extra precision
      in0 = _mm_slli_epi16(in0, 4);
      in1 = _mm_slli_epi16(in1, 4);
      // if (i == 0 && input[0]) input[0] += 1;
+    // add 1 to the upper left pixel if it is non-zero, which helps reduce
+    // the round-trip error
      {
        // The mask will only contain whether the first value is zero, all
        // other comparison will fail as something shifted by 4 (above << 4)
@@ -58,57 +99,119 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
        in0 = _mm_add_epi16(in0, k__nonzero_bias_b);
      }
    }
-  // Do the two transform/transpose passes
-  for (pass = 0; pass < 2; ++pass) {
-    // Transform 1/2: Add/subtract
-    const __m128i r0 = _mm_add_epi16(in0, in1);
-    const __m128i r1 = _mm_sub_epi16(in0, in1);
-    const __m128i r2 = _mm_unpacklo_epi64(r0, r1);
-    const __m128i r3 = _mm_unpackhi_epi64(r0, r1);
-    // Transform 1/2: Interleave to do the multiply by constants which gets us
-    //                into 32 bits.
-    const __m128i t0 = _mm_unpacklo_epi16(r2, r3);
-    const __m128i t2 = _mm_unpackhi_epi16(r2, r3);
-    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
-    const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
-    const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p08_p24);
-    const __m128i u6 = _mm_madd_epi16(t2, k__cospi_p24_m08);
+  // There are 4 total stages, alternating between an add/subtract stage
+  // followed by an multiply-and-add stage.
+  {
+    // Stage 1: Add/subtract
+
+    // in0 = [i0 i1 i2 i3 iC iD iE iF]
+    // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+    const __m128i r0 = _mm_unpacklo_epi16(in0, in1);
+    const __m128i r1 = _mm_unpackhi_epi16(in0, in1);
+    // r0 = [i0 i4 i1 i5 i2 i6 i3 i7]
+    // r1 = [iC i8 iD i9 iE iA iF iB]
+    const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4);
+    const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4);
+    // r2 = [i0 i4 i1 i5 i3 i7 i2 i6]
+    // r3 = [iC i8 iD i9 iF iB iE iA]
+
+    const __m128i t0 = _mm_add_epi16(r2, r3);
+    const __m128i t1 = _mm_sub_epi16(r2, r3);
+    // t0 = [a0 a4 a1 a5 a3 a7 a2 a6]
+    // t1 = [aC a8 aD a9 aF aB aE aA]
+
+    // Stage 2: multiply by constants (which gets us into 32 bits).
+    // The constants needed here are:
+    // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16]
+    // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16]
+    // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08]
+    // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24]
+    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A);
+    const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B);
+    const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C);
+    const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D);
+    // Then add and right-shift to get back to 16-bit range
      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+    const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-    const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-    const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+    const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+    const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-    const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-    const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-    // Combine and transpose
-    const __m128i res0 = _mm_packs_epi32(w0, w2);
-    const __m128i res1 = _mm_packs_epi32(w4, w6);
-    // 00 01 02 03 20 21 22 23
-    // 10 11 12 13 30 31 32 33
-    const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
-    const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1);
-    // 00 10 01 11 02 12 03 13
-    // 20 30 21 31 22 32 23 33
-    in0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
-    in1 = _mm_unpackhi_epi32(tr0_0, tr0_1);
-    in1 = _mm_shuffle_epi32(in1, 0x4E);
-    // 00 10 20 30 01 11 21 31      in0 contains 0 followed by 1
-    // 02 12 22 32 03 13 23 33      in1 contains 2 followed by 3
+    const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+    // w0 = [b0 b1 b7 b6]
+    // w1 = [b8 b9 bF bE]
+    // w2 = [b4 b5 b3 b2]
+    // w3 = [bC bD bB bA]
+    const __m128i x0 = _mm_packs_epi32(w0, w1);
+    const __m128i x1 = _mm_packs_epi32(w2, w3);
+    // x0 = [b0 b1 b7 b6 b8 b9 bF bE]
+    // x1 = [b4 b5 b3 b2 bC bD bB bA]
+    in0 = _mm_shuffle_epi32(x0, 0xD8);
+    in1 = _mm_shuffle_epi32(x1, 0x8D);
+    // in0 = [b0 b1 b8 b9 b7 b6 bF bE]
+    // in1 = [b3 b2 bB bA b4 b5 bC bD]
    }
-  in1 = _mm_shuffle_epi32(in1, 0x4E);
-  // Post-condition output and store it (v + 1) >> 2, taking advantage
-  // of the fact 1/3 are stored just after 0/2.
    {
-     __m128i out01 = _mm_add_epi16(in0, kOne);
-     __m128i out23 = _mm_add_epi16(in1, kOne);
-     out01 = _mm_srai_epi16(out01, 2);
-     out23 = _mm_srai_epi16(out23, 2);
-     _mm_storeu_si128((__m128i *)(output + 0 * 4), out01);
-     _mm_storeu_si128((__m128i *)(output + 2 * 4), out23);
+    // vertical DCTs finished. Now we do the horizontal DCTs.
+    // Stage 3: Add/subtract
+
+    const __m128i t0 = _mm_add_epi16(in0, in1);
+    const __m128i t1 = _mm_sub_epi16(in0, in1);
+    // t0 = [c0 c1 c8 c9  c4  c5  cC  cD]
+    // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE]
+
+    // Stage 4: multiply by constants (which gets us into 32 bits).
+    // The constants needed here are:
+    // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16]
+    // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16]
+    // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24]
+    // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08]
+    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E);
+    const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F);
+    const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G);
+    const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H);
+    // Then add and right-shift to get back to 16-bit range
+    // but this combines the final right-shift as well to save operations
+    // This unusual rounding operations is to maintain bit-accurate
+    // compatibility with the c version of this function which has two
+    // rounding steps in a row.
+    const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2);
+    const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2);
+    const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2);
+    const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2);
+    const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2);
+    const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2);
+    const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2);
+    const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2);
+    // w0 = [o0 o4 o8 oC]
+    // w1 = [o2 o6 oA oE]
+    // w2 = [o1 o5 o9 oD]
+    // w3 = [o3 o7 oB oF]
+    // remember the o's are numbered according to the correct output location
+    const __m128i x0 = _mm_packs_epi32(w0, w1);
+    const __m128i x1 = _mm_packs_epi32(w2, w3);
+    // x0 = [o0 o4 o8 oC o2 o6 oA oE]
+    // x1 = [o1 o5 o9 oD o3 o7 oB oF]
+    const __m128i y0 = _mm_unpacklo_epi16(x0, x1);
+    const __m128i y1 = _mm_unpackhi_epi16(x0, x1);
+    // y0 = [o0 o1 o4 o5 o8 o9 oC oD]
+    // y1 = [o2 o3 o6 o7 oA oB oE oF]
+    in0 = _mm_unpacklo_epi32(y0, y1);
+    // in0 = [o0 o1 o2 o3 o4 o5 o6 o7]
+    in1 = _mm_unpackhi_epi32(y0, y1);
+    // in1 = [o8 o9 oA oB oC oD oE oF]
+  }
+  // Post-condition (v + 1) >> 2 is now incorporated into previous
+  // add and right-shift commands.  Only 2 store instructions needed
+  // because we are using the fact that 1/3 are stored just after 0/2.
+  {
+     _mm_storeu_si128((__m128i *)(output + 0 * 4), in0);
+     _mm_storeu_si128((__m128i *)(output + 2 * 4), in1);
    }
  }
  
+
  static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
                                     int stride) {
    const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk

index 9fb6115..b1ba0b1 100644 (file)
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -12,7 +12,6 @@ VP9_COMMON_SRCS-yes += vp9_common.mk
  VP9_COMMON_SRCS-yes += vp9_iface_common.h
  VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
  VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
-VP9_COMMON_SRCS-yes += common/vp9_onyx.h
  VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
  VP9_COMMON_SRCS-yes += common/vp9_blockd.c
  VP9_COMMON_SRCS-yes += common/vp9_convolve.c
@@ -45,7 +44,7 @@ VP9_COMMON_SRCS-yes += common/vp9_quant_common.h
  VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
  VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
  VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
-VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
+VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl
  VP9_COMMON_SRCS-yes += common/vp9_scale.h
  VP9_COMMON_SRCS-yes += common/vp9_scale.c
  VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
@@ -145,4 +144,4 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
  VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
  VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM)
  
-$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c

index 28c60d1..8d89e74 100644 (file)
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -17,7 +17,6 @@
  #include "vp9/encoder/vp9_onyx_int.h"
  #include "vpx/vp8cx.h"
  #include "vp9/encoder/vp9_firstpass.h"
-#include "vp9/common/vp9_onyx.h"
  #include "vp9/vp9_iface_common.h"
  
  struct vp9_extracfg {
@@ -75,7 +74,7 @@ struct vpx_codec_alg_priv {
    vpx_codec_enc_cfg_t     cfg;
    struct vp9_extracfg     vp8_cfg;
    VP9_CONFIG              oxcf;
-  VP9_PTR                 cpi;
+  VP9_COMP               *cpi;
    unsigned char          *cx_data;
    size_t                  cx_data_sz;
    unsigned char          *pending_cx_data;
@@ -502,8 +501,6 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
    vpx_codec_enc_cfg_t       *cfg;
    unsigned int               i;
  
-  VP9_PTR optr;
-
    if (ctx->priv == NULL) {
      priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
  
@@ -551,15 +548,15 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
      res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
  
      if (res == VPX_CODEC_OK) {
+      VP9_COMP *cpi;
        set_vp9e_config(&ctx->priv->alg_priv->oxcf,
                        ctx->priv->alg_priv->cfg,
                        ctx->priv->alg_priv->vp8_cfg);
-      optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
-
-      if (optr == NULL)
+      cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
+      if (cpi == NULL)
          res = VPX_CODEC_MEM_ERROR;
        else
-        ctx->priv->alg_priv->cpi = optr;
+        ctx->priv->alg_priv->cpi = cpi;
      }
    }
  
@@ -574,7 +571,7 @@ static vpx_codec_err_t vp9e_init(vpx_codec_ctx_t *ctx,
  
  static vpx_codec_err_t vp9e_destroy(vpx_codec_alg_priv_t *ctx) {
    free(ctx->cx_data);
-  vp9_remove_compressor(&ctx->cpi);
+  vp9_remove_compressor(ctx->cpi);
    free(ctx);
    return VPX_CODEC_OK;
  }
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c

index 83d64b8..a9e6a88 100644 (file)
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -17,7 +17,6 @@
  #include "./vpx_version.h"
  #include "vp9/common/vp9_frame_buffers.h"
  #include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
  #include "vp9/decoder/vp9_read_bit_buffer.h"
  #include "vp9/vp9_iface_common.h"
  
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk

index 6679f89..ff20f05 100644 (file)
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -18,6 +18,8 @@ VP9_CX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)
  VP9_CX_SRCS-yes += vp9_cx_iface.c
  
  VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
+VP9_CX_SRCS-yes += encoder/vp9_cost.h
+VP9_CX_SRCS-yes += encoder/vp9_cost.c
  VP9_CX_SRCS-yes += encoder/vp9_dct.c
  VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
  VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk

index de210f4..4967baf 100644 (file)
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -29,7 +29,6 @@ VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
  VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
  VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
  VP9_DX_SRCS-yes += decoder/vp9_onyxd.h
-VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
  VP9_DX_SRCS-yes += decoder/vp9_thread.c
  VP9_DX_SRCS-yes += decoder/vp9_thread.h
  VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
diff --git a/vpx/exports_enc b/vpx/exports_enc

index 99b1bfa..155faf6 100644 (file)
--- a/vpx/exports_enc
+++ b/vpx/exports_enc
@@ -21,3 +21,5 @@ text vpx_svc_set_options
  text vpx_svc_set_quantizers
  text vpx_svc_set_scale_factors
  text vpx_svc_get_layer_resolution
+text vpx_svc_get_rc_stats_buffer_size
+text vpx_svc_get_rc_stats_buffer
+\ No newline at end of file
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c

index c783724..3e22fdf 100644 (file)
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -81,6 +81,10 @@ typedef struct SvcInternal {
    size_t buffer_size;
    void *buffer;
  
+  char *rc_stats_buf;
+  size_t rc_stats_buf_size;
+  size_t rc_stats_buf_used;
+
    char message_buffer[2048];
    vpx_codec_ctx_t *codec_ctx;
  } SvcInternal;
@@ -569,7 +573,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
    enc_cfg->ss_number_layers = si->layers;
    enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
    enc_cfg->kf_mode = VPX_KF_DISABLED;
-  enc_cfg->g_pass = VPX_RC_ONE_PASS;
    // Lag in frames not currently supported
    enc_cfg->g_lag_in_frames = 0;
  
@@ -851,6 +854,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
  
    memset(&superframe, 0, sizeof(superframe));
    svc_log_reset(svc_ctx);
+  si->rc_stats_buf_used = 0;
  
    si->layers = svc_ctx->spatial_layers;
    if (si->frame_within_gop >= si->kf_dist ||
@@ -923,6 +927,25 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
            }
            break;
          }
+        case VPX_CODEC_STATS_PKT: {
+          size_t new_size = si->rc_stats_buf_used +
+              cx_pkt->data.twopass_stats.sz;
+
+          if (new_size > si->rc_stats_buf_size) {
+            char *p = (char*)realloc(si->rc_stats_buf, new_size);
+            if (p == NULL) {
+              svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n");
+              break;
+            }
+            si->rc_stats_buf = p;
+            si->rc_stats_buf_size = new_size;
+          }
+
+          memcpy(si->rc_stats_buf + si->rc_stats_buf_used,
+                 cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz);
+          si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
+          break;
+        }
          default: {
            break;
          }
@@ -1077,7 +1100,24 @@ void vpx_svc_release(SvcContext *svc_ctx) {
    si = (SvcInternal *)svc_ctx->internal;
    if (si != NULL) {
      free(si->buffer);
+    if (si->rc_stats_buf) {
+      free(si->rc_stats_buf);
+    }
      free(si);
      svc_ctx->internal = NULL;
    }
  }
+
+size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return 0;
+  return si->rc_stats_buf_used;
+}
+
+char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return NULL;
+  return si->rc_stats_buf;
+}
+
+
diff --git a/vpx/svc_context.h b/vpx/svc_context.h

index 98474ca..5d0fbbd 100644 (file)
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -114,6 +114,17 @@ size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx);
  void *vpx_svc_get_buffer(const SvcContext *svc_ctx);
  
  /**
+ * return size of two pass rate control stats data to be returned by
+ * vpx_svc_get_rc_stats_buffer
+ */
+size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx);
+
+/**
+ * return buffer two pass of rate control stats data
+ */
+char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx);
+
+/**
   * return spatial resolution of the specified layer
   */
  vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk

index 50d3e9d..51a0ec9 100644 (file)
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -7,7 +7,7 @@ SCALE_SRCS-yes += generic/yv12extend.c
  SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
  SCALE_SRCS-yes += vpx_scale_asm_offsets.c
  SCALE_SRCS-yes += vpx_scale_rtcd.c
-SCALE_SRCS-yes += vpx_scale_rtcd.sh
+SCALE_SRCS-yes += vpx_scale_rtcd.pl
  
  #neon
  SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
@@ -24,4 +24,4 @@ SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
  $(eval $(call asm_offsets_template,\
                  vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
  
-$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh))
+$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl))
diff --git a/vpx_scale/vpx_scale_rtcd.pl b/vpx_scale/vpx_scale_rtcd.pl

new file mode 100644 (file)

index 0000000..8c92570
--- /dev/null
+++ b/vpx_scale/vpx_scale_rtcd.pl
@@ -0,0 +1,34 @@
+sub vpx_scale_forward_decls() {
+print <<EOF
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vpx_scale_forward_decls/;
+
+# Scaler functions
+if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
+    add_proto qw/void vp8_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_5_4_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_5_3_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_2_1_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_2_1_scale_i/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+}
+
+add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+specialize qw/vp8_yv12_extend_frame_borders neon/;
+
+add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_frame neon/;
+
+add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+
+if (vpx_config("CONFIG_VP9") eq "yes") {
+    add_proto qw/void vp9_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+    specialize qw/vp9_extend_frame_borders dspr2/;
+
+    add_proto qw/void vp9_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
+    specialize qw/vp9_extend_frame_inner_borders dspr2/;
+}
+1;
diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh

deleted file mode 100644 (file)

index c26208c..0000000
--- a/vpx_scale/vpx_scale_rtcd.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-vpx_scale_forward_decls() {
-cat <<EOF
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vpx_scale_forward_decls
-
-# Scaler functions
-if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then
-    prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-fi
-
-prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
-specialize vp8_yv12_extend_frame_borders neon
-
-prototype void vp8_yv12_copy_frame "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_frame neon
-
-prototype void vpx_yv12_copy_y "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vpx_yv12_copy_y neon
-
-if [ "$CONFIG_VP9" = "yes" ]; then
-    prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf"
-    specialize vp9_extend_frame_borders dspr2
-
-    prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf"
-    specialize vp9_extend_frame_inner_borders dspr2
-fi
author	Jim Bankoski <jimbankoski@google.com>
	Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>
	Mon, 10 Mar 2014 14:36:05 +0000 (07:36 -0700)
.gitignore		patch \| blob \| history
build/make/Makefile		patch \| blob \| history
build/make/configure.sh		patch \| blob \| history
build/make/rtcd.pl	[new file with mode: 0755]	patch \| blob
build/make/rtcd.sh	[deleted file]	patch \| blob \| history
configure		patch \| blob \| history
examples.mk		patch \| blob \| history
examples/force_keyframe.c	[deleted file]	patch \| blob \| history
examples/resize_util.c	[moved from resize_util.c with 100% similarity]	patch \| blob \| history
examples/simple_encoder.c		patch \| blob \| history
examples/vp8cx_set_ref.c		patch \| blob \| history
examples/vp9_spatial_scalable_encoder.c		patch \| blob \| history
libs.mk		patch \| blob \| history
test/cpu_speed_test.cc		patch \| blob \| history
test/datarate_test.cc		patch \| blob \| history
test/register_state_check.h		patch \| blob \| history
test/vp8_boolcoder_test.cc		patch \| blob \| history
test/vp8_decrypt_test.cc		patch \| blob \| history
third_party/nestegg/README.webm		patch \| blob \| history
third_party/nestegg/src/nestegg.c		patch \| blob \| history
vp8/common/rtcd_defs.pl	[new file with mode: 0644]	patch \| blob
vp8/common/rtcd_defs.sh	[deleted file]	patch \| blob \| history
vp8/vp8_common.mk		patch \| blob \| history
vp9/common/vp9_entropymv.h		patch \| blob \| history
vp9/common/vp9_mvref_common.c		patch \| blob \| history
vp9/common/vp9_onyx.h	[deleted file]	patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl	[new file with mode: 0644]	patch \| blob
vp9/common/vp9_rtcd_defs.sh	[deleted file]	patch \| blob \| history
vp9/common/vp9_systemdependent.h		patch \| blob \| history
vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c		patch \| blob \| history
vp9/decoder/vp9_decodeframe.c		patch \| blob \| history
vp9/decoder/vp9_decodemv.c		patch \| blob \| history
vp9/decoder/vp9_decodemv.h		patch \| blob \| history
vp9/decoder/vp9_detokenize.h		patch \| blob \| history
vp9/decoder/vp9_dthread.c		patch \| blob \| history
vp9/decoder/vp9_onyxd.h		patch \| blob \| history
vp9/decoder/vp9_onyxd_if.c		patch \| blob \| history
vp9/decoder/vp9_onyxd_int.h	[deleted file]	patch \| blob \| history
vp9/encoder/vp9_bitstream.c		patch \| blob \| history
vp9/encoder/vp9_block.h		patch \| blob \| history
vp9/encoder/vp9_cost.c	[new file with mode: 0644]	patch \| blob
vp9/encoder/vp9_cost.h	[new file with mode: 0644]	patch \| blob
vp9/encoder/vp9_encodeframe.c		patch \| blob \| history
vp9/encoder/vp9_encodemv.c		patch \| blob \| history
vp9/encoder/vp9_firstpass.c		patch \| blob \| history
vp9/encoder/vp9_firstpass.h		patch \| blob \| history
vp9/encoder/vp9_mbgraph.c		patch \| blob \| history
vp9/encoder/vp9_mcomp.c		patch \| blob \| history
vp9/encoder/vp9_mcomp.h		patch \| blob \| history
vp9/encoder/vp9_onyx_if.c		patch \| blob \| history
vp9/encoder/vp9_onyx_int.h		patch \| blob \| history
vp9/encoder/vp9_picklpf.c		patch \| blob \| history
vp9/encoder/vp9_picklpf.h		patch \| blob \| history
vp9/encoder/vp9_pickmode.c		patch \| blob \| history
vp9/encoder/vp9_ratectrl.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.h		patch \| blob \| history
vp9/encoder/vp9_segmentation.c		patch \| blob \| history
vp9/encoder/vp9_subexp.c		patch \| blob \| history
vp9/encoder/vp9_tokenize.c		patch \| blob \| history
vp9/encoder/vp9_treewriter.c		patch \| blob \| history
vp9/encoder/vp9_treewriter.h		patch \| blob \| history
vp9/encoder/vp9_writer.c		patch \| blob \| history
vp9/encoder/vp9_writer.h		patch \| blob \| history
vp9/encoder/x86/vp9_dct_sse2.c		patch \| blob \| history
vp9/vp9_common.mk		patch \| blob \| history
vp9/vp9_cx_iface.c		patch \| blob \| history
vp9/vp9_dx_iface.c		patch \| blob \| history
vp9/vp9cx.mk		patch \| blob \| history
vp9/vp9dx.mk		patch \| blob \| history
vpx/exports_enc		patch \| blob \| history
vpx/src/svc_encodeframe.c		patch \| blob \| history
vpx/svc_context.h		patch \| blob \| history
vpx_scale/vpx_scale.mk		patch \| blob \| history
vpx_scale/vpx_scale_rtcd.pl	[new file with mode: 0644]	patch \| blob
vpx_scale/vpx_scale_rtcd.sh	[deleted file]	patch \| blob \| history