significantly speeds up file generation.
the goal of this change is to convert rtcd.sh to perl as directly as
possible to allow for simple comparison. future changes can make it more
perl-like.
---
Linux
[CREATE] vpx_scale_rtcd.h
real 0m0.485s -> 0m0.022s
[CREATE] vp8_rtcd.h
real 0m4.619s -> 0m0.060s
[CREATE] vp9_rtcd.h
real 0m10.102s -> 0m0.087s
Windows
[CREATE] vpx_scale_rtcd.h
real 0m8.360s -> 0m0.080s
[CREATE] vp8_rtcd.h
real 1m8.083s -> 0m0.160s
[CREATE] vp9_rtcd.h
real 2m6.489s -> 0m0.233s
Change-Id: Idfb71188206c91237d6a3c3a81dfe00d103f11ee
--- /dev/null
+#!/usr/bin/env perl
+
+no strict 'refs';
+use warnings;
+use Getopt::Long;
+Getopt::Long::Configure("auto_help");
+
+my %ALL_FUNCS = ();
+my @ALL_ARCHS;
+my @ALL_FORWARD_DECLS;
+my @REQUIRES;
+
+my %opts = ();
+my %disabled = ();
+my %required = ();
+
+my @argv;
+foreach (@ARGV) {
+ $disabled{$1} = 1, next if /--disable-(.*)/;
+ $required{$1} = 1, next if /--require-(.*)/;
+ push @argv, $_;
+}
+
+# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility.
+@ARGV = @argv;
+GetOptions(
+ \%opts,
+ 'arch=s',
+ 'sym=s',
+ 'config=s',
+);
+
+foreach my $opt (qw/arch config/) {
+ if (!defined($opts{$opt})) {
+ warn "--$opt is required!\n";
+ Getopt::Long::HelpMessage('-exit' => 1);
+ }
+}
+
+foreach my $defs_file (@ARGV) {
+ if (!-f $defs_file) {
+ warn "$defs_file: $!\n";
+ Getopt::Long::HelpMessage('-exit' => 1);
+ }
+}
+
+open CONFIG_FILE, $opts{config} or
+ die "Error opening config file '$opts{config}': $!\n";
+
+my %config = ();
+while (<CONFIG_FILE>) {
+ next if !/^CONFIG_/;
+ chomp;
+ my @pair = split /=/;
+ $config{$pair[0]} = $pair[1];
+}
+close CONFIG_FILE;
+
+#
+# Routines for the RTCD DSL to call
+#
+sub vpx_config($) {
+ return (defined $config{$_[0]}) ? $config{$_[0]} : "";
+}
+
+sub specialize {
+ my $fn=$_[0];
+ shift;
+ foreach my $opt (@_) {
+ eval "\$${fn}_${opt}=${fn}_${opt}";
+ }
+}
+
+sub add_proto {
+ my $fn = splice(@_, -2, 1);
+ $ALL_FUNCS{$fn} = \@_;
+ specialize $fn, "c";
+}
+
+sub require {
+ foreach my $fn (keys %ALL_FUNCS) {
+ foreach my $opt (@_) {
+ my $ofn = eval "\$${fn}_${opt}";
+ next if !$ofn;
+
+ # if we already have a default, then we can disable it, as we know
+ # we can do better.
+ my $best = eval "\$${fn}_default";
+ if ($best) {
+ my $best_ofn = eval "\$${best}";
+ if ($best_ofn && "$best_ofn" ne "$ofn") {
+ eval "\$${best}_link = 'false'";
+ }
+ }
+ eval "\$${fn}_default=${fn}_${opt}";
+ eval "\$${fn}_${opt}_link='true'";
+ }
+ }
+}
+
+sub forward_decls {
+ push @ALL_FORWARD_DECLS, @_;
+}
+
+#
+# Include the user's directives
+#
+foreach my $f (@ARGV) {
+ open FILE, "<", $f or die "cannot open $f: $!\n";
+ my $contents = join('', <FILE>);
+ close FILE;
+ eval $contents or warn "eval failed: $@\n";
+}
+
+#
+# Process the directives according to the command line
+#
+sub process_forward_decls() {
+ foreach (@ALL_FORWARD_DECLS) {
+ $_->();
+ }
+}
+
+sub determine_indirection {
+ vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS);
+ foreach my $fn (keys %ALL_FUNCS) {
+ my $n = "";
+ my @val = @{$ALL_FUNCS{$fn}};
+ my $args = pop @val;
+ my $rtyp = "@val";
+ my $dfn = eval "\$${fn}_default";
+ $dfn = eval "\$${dfn}";
+ foreach my $opt (@_) {
+ my $ofn = eval "\$${fn}_${opt}";
+ next if !$ofn;
+ my $link = eval "\$${fn}_${opt}_link";
+ next if $link && $link eq "false";
+ $n .= "x";
+ }
+ if ($n eq "x") {
+ eval "\$${fn}_indirect = 'false'";
+ } else {
+ eval "\$${fn}_indirect = 'true'";
+ }
+ }
+}
+
+sub declare_function_pointers {
+ foreach my $fn (sort keys %ALL_FUNCS) {
+ my @val = @{$ALL_FUNCS{$fn}};
+ my $args = pop @val;
+ my $rtyp = "@val";
+ my $dfn = eval "\$${fn}_default";
+ $dfn = eval "\$${dfn}";
+ foreach my $opt (@_) {
+ my $ofn = eval "\$${fn}_${opt}";
+ next if !$ofn;
+ print "$rtyp ${ofn}($args);\n";
+ }
+ if (eval "\$${fn}_indirect" eq "false") {
+ print "#define ${fn} ${dfn}\n";
+ } else {
+ print "RTCD_EXTERN $rtyp (*${fn})($args);\n";
+ }
+ print "\n";
+ }
+}
+
+sub set_function_pointers {
+ foreach my $fn (sort keys %ALL_FUNCS) {
+ my @val = @{$ALL_FUNCS{$fn}};
+ my $args = pop @val;
+ my $rtyp = "@val";
+ my $dfn = eval "\$${fn}_default";
+ $dfn = eval "\$${dfn}";
+ if (eval "\$${fn}_indirect" eq "true") {
+ print " $fn = $dfn;\n";
+ foreach my $opt (@_) {
+ my $ofn = eval "\$${fn}_${opt}";
+ next if !$ofn;
+ next if "$ofn" eq "$dfn";
+ my $link = eval "\$${fn}_${opt}_link";
+ next if $link && $link eq "false";
+ my $cond = eval "\$have_${opt}";
+ print " if (${cond}) $fn = $ofn;\n"
+ }
+ }
+ }
+}
+
+sub filter {
+ my @filtered;
+ foreach (@_) { push @filtered, $_ unless $disabled{$_}; }
+ return @filtered;
+}
+
+#
+# Helper functions for generating the arch specific RTCD files
+#
+sub common_top() {
+ my $include_guard = uc($opts{sym})."_H_";
+ print <<EOF;
+#ifndef ${include_guard}
+#define ${include_guard}
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+
+process_forward_decls();
+print "\n";
+declare_function_pointers("c", @ALL_ARCHS);
+
+print <<EOF;
+void $opts{sym}(void);
+
+EOF
+}
+
+sub common_bottom() {
+ print <<EOF;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
+EOF
+}
+
+sub x86() {
+ determine_indirection("c", @ALL_ARCHS);
+
+ # Assign the helper variable for each enabled extension
+ foreach my $opt (@ALL_ARCHS) {
+ my $opt_uc = uc $opt;
+ eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+ }
+
+ common_top;
+ print <<EOF;
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+ int flags = x86_simd_caps();
+
+ (void)flags;
+
+EOF
+
+ set_function_pointers("c", @ALL_ARCHS);
+
+ print <<EOF;
+}
+#endif
+EOF
+ common_bottom;
+}
+
+sub arm() {
+ determine_indirection("c", @ALL_ARCHS);
+
+ # Assign the helper variable for each enabled extension
+ foreach my $opt (@ALL_ARCHS) {
+ my $opt_uc = uc $opt;
+ eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+ }
+
+ common_top;
+ print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+ int flags = arm_cpu_caps();
+
+ (void)flags;
+
+EOF
+
+ set_function_pointers("c", @ALL_ARCHS);
+
+ print <<EOF;
+}
+#endif
+EOF
+ common_bottom;
+}
+
+sub mips() {
+ determine_indirection("c", @ALL_ARCHS);
+ common_top;
+
+ print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+ set_function_pointers("c", @ALL_ARCHS);
+
+ print <<EOF;
+#if HAVE_DSPR2
+#if CONFIG_VP8
+void dsputil_static_init();
+dsputil_static_init();
+#endif
+#if CONFIG_VP9
+void vp9_dsputil_static_init();
+vp9_dsputil_static_init();
+#endif
+#endif
+}
+#endif
+EOF
+ common_bottom;
+}
+
+sub unoptimized() {
+ determine_indirection "c";
+ common_top;
+ print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+ set_function_pointers "c";
+
+ print <<EOF;
+}
+#endif
+EOF
+ common_bottom;
+}
+
+#
+# Main Driver
+#
+
+&require("c");
+if ($opts{arch} eq 'x86') {
+ @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+ x86;
+} elsif ($opts{arch} eq 'x86_64') {
+ @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+ @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/);
+ &require(@REQUIRES);
+ x86;
+} elsif ($opts{arch} eq 'mips32') {
+ @ALL_ARCHS = filter(qw/mips32/);
+ open CONFIG_FILE, $opts{config} or
+ die "Error opening config file '$opts{config}': $!\n";
+ while (<CONFIG_FILE>) {
+ if (/HAVE_DSPR2=yes/) {
+ @ALL_ARCHS = filter(qw/mips32 dspr2/);
+ last;
+ }
+ }
+ close CONFIG_FILE;
+ mips;
+} elsif ($opts{arch} eq 'armv5te') {
+ @ALL_ARCHS = filter(qw/edsp/);
+ arm;
+} elsif ($opts{arch} eq 'armv6') {
+ @ALL_ARCHS = filter(qw/edsp media/);
+ arm;
+} elsif ($opts{arch} eq 'armv7') {
+ @ALL_ARCHS = filter(qw/edsp media neon/);
+ arm;
+} else {
+ unoptimized;
+}
+
+__END__
+
+=head1 NAME
+
+rtcd -
+
+=head1 SYNOPSIS
+
+Usage: rtcd.pl [options] FILE
+
+See 'perldoc rtcd.pl' for more details.
+
+=head1 DESCRIPTION
+
+Reads the Run Time CPU Detections definitions from FILE and generates a
+C header file on stdout.
+
+=head1 OPTIONS
+
+Options:
+ --arch=ARCH Architecture to generate defs for (required)
+ --disable-EXT Disable support for EXT extensions
+ --require-EXT Require support for EXT extensions
+ --sym=SYMBOL Unique symbol to use for RTCD initialization function
+ --config=FILE File with CONFIG_FOO=yes lines to parse
+++ /dev/null
-#!/bin/sh
-self=$0
-
-usage() {
- cat <<EOF >&2
-Usage: $self [options] FILE
-
-Reads the Run Time CPU Detections definitions from FILE and generates a
-C header file on stdout.
-
-Options:
- --arch=ARCH Architecture to generate defs for (required)
- --disable-EXT Disable support for EXT extensions
- --require-EXT Require support for EXT extensions
- --sym=SYMBOL Unique symbol to use for RTCD initialization function
- --config=FILE File with CONFIG_FOO=yes lines to parse
-EOF
- exit 1
-}
-
-die() {
- echo "$@" >&2
- exit 1
-}
-
-die_argument_required() {
- die "Option $opt requires argument"
-}
-
-for opt; do
- optval="${opt#*=}"
- case "$opt" in
- --arch) die_argument_required;;
- --arch=*) arch=${optval};;
- --disable-*) eval "disable_${opt#--disable-}=true";;
- --require-*) REQUIRES="${REQUIRES}${opt#--require-} ";;
- --sym) die_argument_required;;
- --sym=*) symbol=${optval};;
- --config=*) config_file=${optval};;
- -h|--help)
- usage
- ;;
- -*)
- die "Unrecognized option: ${opt%%=*}"
- ;;
- *)
- defs_file="$defs_file $opt"
- ;;
- esac
- shift
-done
-for f in $defs_file; do [ -f "$f" ] || usage; done
-[ -n "$arch" ] || usage
-
-# Import the configuration
-[ -f "$config_file" ] && eval $(grep CONFIG_ "$config_file")
-
-#
-# Routines for the RTCD DSL to call
-#
-prototype() {
- rtyp=""
- case "$1" in
- unsigned) rtyp="$1 "; shift;;
- esac
- rtyp="${rtyp}$1"
- fn="$2"
- args="$3"
-
- eval "${2}_rtyp='$rtyp'"
- eval "${2}_args='$3'"
- ALL_FUNCS="$ALL_FUNCS $fn"
- specialize $fn c
-}
-
-specialize() {
- fn="$1"
- shift
- for opt in "$@"; do
- eval "${fn}_${opt}=${fn}_${opt}"
- done
-}
-
-require() {
- for fn in $ALL_FUNCS; do
- for opt in "$@"; do
- ofn=$(eval "echo \$${fn}_${opt}")
- [ -z "$ofn" ] && continue
-
- # if we already have a default, then we can disable it, as we know
- # we can do better.
- best=$(eval "echo \$${fn}_default")
- best_ofn=$(eval "echo \$${best}")
- [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false"
- eval "${fn}_default=${fn}_${opt}"
- eval "${fn}_${opt}_link=true"
- done
- done
-}
-
-forward_decls() {
- ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1"
-}
-
-#
-# Include the user's directives
-#
-for f in $defs_file; do
- . $f
-done
-
-#
-# Process the directives according to the command line
-#
-process_forward_decls() {
- for fn in $ALL_FORWARD_DECLS; do
- eval $fn
- done
-}
-
-determine_indirection() {
- [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS
- for fn in $ALL_FUNCS; do
- n=""
- rtyp="$(eval "echo \$${fn}_rtyp")"
- args="$(eval "echo \"\$${fn}_args\"")"
- dfn="$(eval "echo \$${fn}_default")"
- dfn=$(eval "echo \$${dfn}")
- for opt in "$@"; do
- ofn=$(eval "echo \$${fn}_${opt}")
- [ -z "$ofn" ] && continue
- link=$(eval "echo \$${fn}_${opt}_link")
- [ "$link" = "false" ] && continue
- n="${n}x"
- done
- if [ "$n" = "x" ]; then
- eval "${fn}_indirect=false"
- else
- eval "${fn}_indirect=true"
- fi
- done
-}
-
-declare_function_pointers() {
- for fn in $ALL_FUNCS; do
- rtyp="$(eval "echo \$${fn}_rtyp")"
- args="$(eval "echo \"\$${fn}_args\"")"
- dfn="$(eval "echo \$${fn}_default")"
- dfn=$(eval "echo \$${dfn}")
- for opt in "$@"; do
- ofn=$(eval "echo \$${fn}_${opt}")
- [ -z "$ofn" ] && continue
- echo "$rtyp ${ofn}($args);"
- done
- if [ "$(eval "echo \$${fn}_indirect")" = "false" ]; then
- echo "#define ${fn} ${dfn}"
- else
- echo "RTCD_EXTERN $rtyp (*${fn})($args);"
- fi
- echo
- done
-}
-
-set_function_pointers() {
- for fn in $ALL_FUNCS; do
- n=""
- rtyp="$(eval "echo \$${fn}_rtyp")"
- args="$(eval "echo \"\$${fn}_args\"")"
- dfn="$(eval "echo \$${fn}_default")"
- dfn=$(eval "echo \$${dfn}")
- if $(eval "echo \$${fn}_indirect"); then
- echo " $fn = $dfn;"
- for opt in "$@"; do
- ofn=$(eval "echo \$${fn}_${opt}")
- [ -z "$ofn" ] && continue
- [ "$ofn" = "$dfn" ] && continue;
- link=$(eval "echo \$${fn}_${opt}_link")
- [ "$link" = "false" ] && continue
- cond="$(eval "echo \$have_${opt}")"
- echo " if (${cond}) $fn = $ofn;"
- done
- fi
- echo
- done
-}
-
-filter() {
- filtered=""
- for opt in "$@"; do
- [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt"
- done
- echo $filtered
-}
-
-#
-# Helper functions for generating the arch specific RTCD files
-#
-common_top() {
- outfile_basename=$(basename ${symbol:-rtcd})
- include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \
- tr -c '[A-Z0-9]' _)H_
- cat <<EOF
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#ifdef RTCD_C
-#define RTCD_EXTERN
-#else
-#define RTCD_EXTERN extern
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-$(process_forward_decls)
-
-$(declare_function_pointers c $ALL_ARCHS)
-
-void ${symbol:-rtcd}(void);
-EOF
-}
-
-common_bottom() {
- cat <<EOF
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
-EOF
-}
-
-x86() {
- determine_indirection c $ALL_ARCHS
-
- # Assign the helper variable for each enabled extension
- for opt in $ALL_ARCHS; do
- uc=$(echo $opt | tr '[a-z]' '[A-Z]')
- eval "have_${opt}=\"flags & HAS_${uc}\""
- done
-
- cat <<EOF
-$(common_top)
-
-#ifdef RTCD_C
-#include "vpx_ports/x86.h"
-static void setup_rtcd_internal(void)
-{
- int flags = x86_simd_caps();
-
- (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-arm() {
- determine_indirection c $ALL_ARCHS
-
- # Assign the helper variable for each enabled extension
- for opt in $ALL_ARCHS; do
- uc=$(echo $opt | tr '[a-z]' '[A-Z]')
- eval "have_${opt}=\"flags & HAS_${uc}\""
- done
-
- cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-#include "vpx_ports/arm.h"
-static void setup_rtcd_internal(void)
-{
- int flags = arm_cpu_caps();
-
- (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-
-mips() {
- determine_indirection c $ALL_ARCHS
- cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c $ALL_ARCHS)
-#if HAVE_DSPR2
-#if CONFIG_VP8
-void dsputil_static_init();
-dsputil_static_init();
-#endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
-#endif
-#endif
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-unoptimized() {
- determine_indirection c
- cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c)
-}
-#endif
-$(common_bottom)
-EOF
-
-}
-#
-# Main Driver
-#
-ALL_FUNCS=$(export LC_ALL=C; echo $ALL_FUNCS | tr ' ' '\n' | sort |tr '\n' ' ')
-require c
-case $arch in
- x86)
- ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
- x86
- ;;
- x86_64)
- ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
- REQUIRES=${REQUIRES:-mmx sse sse2}
- require $(filter $REQUIRES)
- x86
- ;;
- mips32)
- ALL_ARCHS=$(filter mips32)
- dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file"))
- HAVE_DSPR2="${dspr2#*=}"
- if [ "$HAVE_DSPR2" = "yes" ]; then
- ALL_ARCHS=$(filter mips32 dspr2)
- fi
- mips
- ;;
- armv5te)
- ALL_ARCHS=$(filter edsp)
- arm
- ;;
- armv6)
- ALL_ARCHS=$(filter edsp media)
- arm
- ;;
- armv7)
- ALL_ARCHS=$(filter edsp media neon)
- arm
- ;;
- *)
- unoptimized
- ;;
-esac
define rtcd_h_template
$$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
@echo " [CREATE] $$@"
- $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
+ $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
--sym=$(1) \
--config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
$$(RTCD_OPTIONS) $$^ > $$@
endif
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
+CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
--- /dev/null
+sub vp8_common_forward_decls() {
+print <<EOF
+/*
+ * VP8
+ */
+
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp8_common_forward_decls/;
+
+#
+# system state
+#
+add_proto qw/void vp8_clear_system_state/, "";
+specialize qw/vp8_clear_system_state mmx/;
+$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
+
+#
+# Dequant
+#
+add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
+specialize qw/vp8_dequantize_b mmx media neon/;
+$vp8_dequantize_b_media=vp8_dequantize_b_v6;
+
+add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
+specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
+$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
+$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
+$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
+
+add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
+
+add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
+$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
+
+add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
+
+
+add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
+$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
+$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
+$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
+$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
+$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
+$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
+$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
+$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
+$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
+$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
+$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
+$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
+$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
+$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
+$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
+$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
+$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
+
+#
+# IDCT
+#
+#idct16
+add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
+specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/;
+$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
+$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
+
+#iwalsh1
+add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
+$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
+# no asm yet
+
+#iwalsh16
+add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/;
+$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
+$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
+
+#idct1_scalar_add
+add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
+specialize qw/vp8_dc_only_idct_add mmx media neon dspr2/;
+$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
+$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
+
+#
+# RECON
+#
+add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/;
+$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
+$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
+
+add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/;
+$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
+$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
+
+add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/;
+$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
+$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
+
+add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
+specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/;
+#TODO: fix assembly for neon
+
+add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
+specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/;
+
+add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
+specialize qw/vp8_intra4x4_predict media/;
+$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
+
+#
+# Postproc
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes") {
+ add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+ specialize qw/vp8_mbpost_proc_down mmx sse2/;
+ $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
+
+ add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+ specialize qw/vp8_mbpost_proc_across_ip sse2/;
+ $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
+
+ add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
+ specialize qw/vp8_post_proc_down_and_across_mb_row sse2/;
+
+ add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
+ specialize qw/vp8_plane_add_noise mmx sse2/;
+ $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
+
+ add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+ # no asm yet
+
+ add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ specialize qw/vp8_filter_by_weight16x16 sse2/;
+
+ add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ specialize qw/vp8_filter_by_weight8x8 sse2/;
+
+ add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+ # no asm yet
+}
+
+#
+# Subpixel
+#
+add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
+$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
+$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
+$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
+
+add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/;
+$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
+$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
+
+add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
+$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
+
+add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
+
+#
+# Whole-pixel Variance
+#
+add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance4x4 mmx sse2/;
+$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x8 mmx sse2 media neon/;
+$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
+$vp8_variance8x8_media=vp8_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x16 mmx sse2 neon/;
+$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x8 mmx sse2 neon/;
+$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x16 mmx sse2 media neon/;
+$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
+$vp8_variance16x16_media=vp8_variance16x16_armv6;
+
+#
+# Sub-pixel Variance
+#
+add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
+$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
+$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
+$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
+$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
+$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
+$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
+$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
+$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+
+#
+# Single block SAD
+#
+add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad4x4 mmx sse2 neon/;
+$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
+
+add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x8 mmx sse2 neon/;
+$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
+
+add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x16 mmx sse2 neon/;
+$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
+
+add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x8 mmx sse2 neon/;
+$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
+
+add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
+$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
+$vp8_sad16x16_media=vp8_sad16x16_armv6;
+
+#
+# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+#
+add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x3 sse3/;
+
+add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x3 sse3/;
+
+add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x3 sse3/;
+
+add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x3 sse3 ssse3/;
+
+# Note the only difference in the following prototypes is that they return into
+# an array of short
+add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad4x4x8 sse4_1/;
+$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
+
+add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x8x8 sse4_1/;
+$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
+
+add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x16x8 sse4_1/;
+$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
+
+add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x8x8 sse4_1/;
+$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
+
+add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x16x8 sse4_1/;
+$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
+
+#
+# Multi-block SAD, comparing a reference to N independent blocks
+#
+add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x4d sse3/;
+
+add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x4d sse3/;
+
+add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x4d sse3/;
+
+add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x4d sse3/;
+
+add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x4d sse3/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
+
+#
+# Sum of squares (vector)
+#
+add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
+specialize qw/vp8_get_mb_ss mmx sse2/;
+
+#
+# SSE (Sum Squared Error)
+#
+add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
+$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
+
+add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
+$vp8_mse16x16_media=vp8_mse16x16_armv6;
+
+add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
+specialize qw/vp8_get4x4sse_cs mmx neon/;
+
+#
+# Block copy
+#
+if ($opts{arch} =~ /x86/) {
+ add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+ specialize qw/vp8_copy32xn sse2 sse3/;
+}
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
+
+ add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
+
+ add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
+}
+
+#
+# Forward DCT
+#
+add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+
+add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+
+add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+
+#
+# Quantizer
+#
+add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
+
+add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+
+add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+# no asm yet
+
+add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+specialize qw/vp8_fast_quantize_b_pair neon/;
+
+add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
+specialize qw/vp8_quantize_mb neon/;
+
+add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
+specialize qw/vp8_quantize_mby neon/;
+
+add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
+specialize qw/vp8_quantize_mbuv neon/;
+
+#
+# Block subtraction
+#
+add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
+specialize qw/vp8_block_error mmx sse2/;
+$vp8_block_error_sse2=vp8_block_error_xmm;
+
+add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
+specialize qw/vp8_mbblock_error mmx sse2/;
+$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
+
+add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
+specialize qw/vp8_mbuverror mmx sse2/;
+$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
+
+add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
+specialize qw/vp8_subtract_b mmx sse2 media neon/;
+$vp8_subtract_b_media=vp8_subtract_b_armv6;
+
+add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
+specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+
+add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+
+#
+# Motion search
+#
+add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_full_search_sad sse3 sse4_1/;
+$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
+$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
+
+add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_refining_search_sad sse3/;
+$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
+
+add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
+
+#
+# Alt-ref Noise Reduction (ARNR)
+#
+if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
+ add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
+ specialize qw/vp8_temporal_filter_apply sse2/;
+}
+
+#
+# Pick Loopfilter
+#
+add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_partial_frame neon/;
+
+#
+# Denoiser filter
+#
+if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
+ add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+ specialize qw/vp8_denoiser_filter sse2 neon/;
+}
+
+# End of encoder only functions
+}
+1;
+++ /dev/null
-vp8_common_forward_decls() {
-cat <<EOF
-/*
- * VP8
- */
-
-struct blockd;
-struct macroblockd;
-struct loop_filter_info;
-
-/* Encoder forward decls */
-struct block;
-struct macroblock;
-struct variance_vtable;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp8_common_forward_decls
-
-#
-# system state
-#
-prototype void vp8_clear_system_state ""
-specialize vp8_clear_system_state mmx
-vp8_clear_system_state_mmx=vpx_reset_mmx_state
-
-#
-# Dequant
-#
-prototype void vp8_dequantize_b "struct blockd*, short *dqc"
-specialize vp8_dequantize_b mmx media neon
-vp8_dequantize_b_media=vp8_dequantize_b_v6
-
-prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
-specialize vp8_dequant_idct_add mmx media neon dspr2
-vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
-vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
-
-prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
-specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
-specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-#
-# Loopfilter
-#
-prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
-vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
-vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
-
-prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
-vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
-vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
-
-prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
-vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
-vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
-
-prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
-vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
-vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
-
-
-prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
-vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c
-vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx
-vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2
-vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6
-vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon
-
-prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbh mmx sse2 media neon
-vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c
-vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx
-vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2
-vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6
-vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon
-
-prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bv mmx sse2 media neon
-vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c
-vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx
-vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2
-vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6
-vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon
-
-prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bh mmx sse2 media neon
-vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c
-vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx
-vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
-vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
-vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
-
-#
-# IDCT
-#
-#idct16
-prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
-specialize vp8_short_idct4x4llm mmx media neon dspr2
-vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
-vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
-
-#iwalsh1
-prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4_1 dspr2
-vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
-# no asm yet
-
-#iwalsh16
-prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
-vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
-vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
-
-#idct1_scalar_add
-prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
-specialize vp8_dc_only_idct_add mmx media neon dspr2
-vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
-vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
-
-#
-# RECON
-#
-prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
-vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
-vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
-
-prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x8 mmx media neon dspr2
-vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
-vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
-
-prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x4 mmx media neon dspr2
-vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
-
-prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
-specialize vp8_build_intra_predictors_mby_s sse2 ssse3
-#TODO: fix assembly for neon
-
-prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
-specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-specialize vp8_intra4x4_predict media
-vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
-
-#
-# Postproc
-#
-if [ "$CONFIG_POSTPROC" = "yes" ]; then
- prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit"
- specialize vp8_mbpost_proc_down mmx sse2
- vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm
-
- prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit"
- specialize vp8_mbpost_proc_across_ip sse2
- vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
-
- prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
- specialize vp8_post_proc_down_and_across_mb_row sse2
-
- prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
- specialize vp8_plane_add_noise mmx sse2
- vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt
-
- prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
- # no asm yet
-
- prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- specialize vp8_filter_by_weight16x16 sse2
-
- prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- specialize vp8_filter_by_weight8x8 sse2
-
- prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
- # no asm yet
-fi
-
-#
-# Subpixel
-#
-prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
-vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
-
-prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
-vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
-
-prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
-vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
-
-prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
-vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
-vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
-
-prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
-vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6
-
-prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
-vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6
-
-prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x4 mmx media neon
-vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6
-
-prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict4x4 mmx media neon
-vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6
-
-#
-# Whole-pixel Variance
-#
-prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance4x4 mmx sse2
-vp8_variance4x4_sse2=vp8_variance4x4_wmt
-
-prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance8x8 mmx sse2 media neon
-vp8_variance8x8_sse2=vp8_variance8x8_wmt
-vp8_variance8x8_media=vp8_variance8x8_armv6
-
-prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance8x16 mmx sse2 neon
-vp8_variance8x16_sse2=vp8_variance8x16_wmt
-
-prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance16x8 mmx sse2 neon
-vp8_variance16x8_sse2=vp8_variance16x8_wmt
-
-prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance16x16 mmx sse2 media neon
-vp8_variance16x16_sse2=vp8_variance16x16_wmt
-vp8_variance16x16_media=vp8_variance16x16_armv6
-
-#
-# Sub-pixel Variance
-#
-prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance4x4 mmx sse2
-vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon
-vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt
-vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6
-
-prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x16 mmx sse2
-vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3
-vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon
-vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt
-vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon
-vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt
-vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon
-vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt
-vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon
-vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt
-vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
-
-#
-# Single block SAD
-#
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad4x4 mmx sse2 neon
-vp8_sad4x4_sse2=vp8_sad4x4_wmt
-
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x8 mmx sse2 neon
-vp8_sad8x8_sse2=vp8_sad8x8_wmt
-
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x16 mmx sse2 neon
-vp8_sad8x16_sse2=vp8_sad8x16_wmt
-
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x8 mmx sse2 neon
-vp8_sad16x8_sse2=vp8_sad16x8_wmt
-
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x16 mmx sse2 sse3 media neon
-vp8_sad16x16_sse2=vp8_sad16x16_wmt
-vp8_sad16x16_media=vp8_sad16x16_armv6
-
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x3 sse3
-
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x3 sse3
-
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x3 sse3
-
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x3 sse3 ssse3
-
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x3 sse3 ssse3
-
-# Note the only difference in the following prototypes is that they return into
-# an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad4x4x8 sse4_1
-vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
-
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x8x8 sse4_1
-vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
-
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x16x8 sse4_1
-vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
-
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x8x8 sse4_1
-vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
-
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x16x8 sse4_1
-vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x4d sse3
-
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x4d sse3
-
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x4d sse3
-
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x4d sse3
-
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x4d sse3
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then
-
-#
-# Sum of squares (vector)
-#
-prototype unsigned int vp8_get_mb_ss "const short *"
-specialize vp8_get_mb_ss mmx sse2
-
-#
-# SSE (Sum Squared Error)
-#
-prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_mse16x16 mmx sse2
-vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt
-
-prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp8_mse16x16 mmx sse2 media neon
-vp8_mse16x16_sse2=vp8_mse16x16_wmt
-vp8_mse16x16_media=vp8_mse16x16_armv6
-
-prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"
-specialize vp8_get4x4sse_cs mmx neon
-
-#
-# Block copy
-#
-case $arch in
- x86*)
- prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
- specialize vp8_copy32xn sse2 sse3
- ;;
-esac
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
- [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
-
- prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp8_ssim_parms_8x8 $sse2_on_x86_64
-
- prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
-fi
-
-#
-# Forward DCT
-#
-prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct4x4 mmx sse2 media neon
-vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
-
-prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct8x4 mmx sse2 media neon
-vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
-
-prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
-specialize vp8_short_walsh4x4 sse2 media neon
-vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
-
-#
-# Quantizer
-#
-prototype void vp8_regular_quantize_b "struct block *, struct blockd *"
-specialize vp8_regular_quantize_b sse2 #sse4_1
-# TODO(johann) Update sse4 implementation and re-enable
-#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4
-
-prototype void vp8_fast_quantize_b "struct block *, struct blockd *"
-specialize vp8_fast_quantize_b sse2 ssse3 media neon
-vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6
-
-prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-# no asm yet
-
-prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-specialize vp8_fast_quantize_b_pair neon
-
-prototype void vp8_quantize_mb "struct macroblock *"
-specialize vp8_quantize_mb neon
-
-prototype void vp8_quantize_mby "struct macroblock *"
-specialize vp8_quantize_mby neon
-
-prototype void vp8_quantize_mbuv "struct macroblock *"
-specialize vp8_quantize_mbuv neon
-
-#
-# Block subtraction
-#
-prototype int vp8_block_error "short *coeff, short *dqcoeff"
-specialize vp8_block_error mmx sse2
-vp8_block_error_sse2=vp8_block_error_xmm
-
-prototype int vp8_mbblock_error "struct macroblock *mb, int dc"
-specialize vp8_mbblock_error mmx sse2
-vp8_mbblock_error_sse2=vp8_mbblock_error_xmm
-
-prototype int vp8_mbuverror "struct macroblock *mb"
-specialize vp8_mbuverror mmx sse2
-vp8_mbuverror_sse2=vp8_mbuverror_xmm
-
-prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch"
-specialize vp8_subtract_b mmx sse2 media neon
-vp8_subtract_b_media=vp8_subtract_b_armv6
-
-prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"
-specialize vp8_subtract_mby mmx sse2 media neon
-vp8_subtract_mby_media=vp8_subtract_mby_armv6
-
-prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"
-specialize vp8_subtract_mbuv mmx sse2 media neon
-vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6
-
-#
-# Motion search
-#
-prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_full_search_sad sse3 sse4_1
-vp8_full_search_sad_sse3=vp8_full_search_sadx3
-vp8_full_search_sad_sse4_1=vp8_full_search_sadx8
-
-prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_refining_search_sad sse3
-vp8_refining_search_sad_sse3=vp8_refining_search_sadx4
-
-prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4
-
-#
-# Alt-ref Noise Reduction (ARNR)
-#
-if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then
- prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
- specialize vp8_temporal_filter_apply sse2
-fi
-
-#
-# Pick Loopfilter
-#
-prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_partial_frame neon
-
-#
-# Denoiser filter
-#
-if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
- prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
- specialize vp8_denoiser_filter sse2 neon
-fi
-
-# End of encoder only functions
-fi
VP8_COMMON_SRCS-yes += common/reconinter.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
VP8_COMMON_SRCS-yes += common/rtcd.c
-VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP8_COMMON_SRCS-yes += common/rtcd_defs.pl
VP8_COMMON_SRCS-yes += common/setupintrarecon.h
VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
VP8_COMMON_SRCS-yes += common/systemdependent.h
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c
-$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
--- /dev/null
+sub vp9_common_forward_decls() {
+print <<EOF
+/*
+ * VP9
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_enums.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vp9_variance_vtable;
+
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp9_common_forward_decls/;
+
+# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
+if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
+ $mmx_x86inc = 'mmx';
+ $sse_x86inc = 'sse';
+ $sse2_x86inc = 'sse2';
+ $ssse3_x86inc = 'ssse3';
+ $avx_x86inc = 'avx';
+ $avx2_x86inc = 'avx2';
+} else {
+ $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
+ $avx_x86inc = $avx2_x86inc = '';
+}
+
+# this variable is for functions that are 64 bit only.
+if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64 = 'mmx';
+ $sse2_x86_64 = 'sse2';
+ $ssse3_x86_64 = 'ssse3';
+ $avx_x86_64 = 'avx';
+ $avx2_x86_64 = 'avx2';
+} else {
+ $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
+ $avx_x86_64 = $avx2_x86_64 = '';
+}
+
+#
+# RECON
+#
+add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_4x4/;
+
+add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_4x4/;
+
+add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_4x4/;
+
+add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_4x4/;
+
+add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_4x4/;
+
+add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_8x8/;
+
+add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_8x8/;
+
+add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_8x8/;
+
+add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_8x8/;
+
+add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_8x8/;
+
+add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_16x16/;
+
+add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_16x16/;
+
+add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_16x16/;
+
+add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_16x16/;
+
+add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_16x16/;
+
+add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_32x32/;
+
+add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_32x32/;
+
+add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_32x32/;
+
+add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+
+add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_32x32/;
+
+add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_32x32/;
+
+add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_32x32/;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+
+#
+# post proc
+#
+if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_down mmx sse2/;
+$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
+
+add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_across_ip sse2/;
+$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
+
+add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
+specialize qw/vp9_post_proc_down_and_across mmx sse2/;
+$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
+
+add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+specialize qw/vp9_plane_add_noise mmx sse2/;
+$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
+}
+
+add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_inner/;
+
+add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_outer/;
+
+add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_b/;
+
+#
+# Sub Pixel Filters
+#
+add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+
+#
+# dct
+#
+add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
+$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
+
+add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
+specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
+
+# dct and add
+
+add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_1_add/;
+
+add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_16_add/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+
+
+# variance
+add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_8x8 sse2/;
+$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
+
+add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x8/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
+add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x16/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x8/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_hv/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_hv/;
+
+add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x3/;
+
+add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x3/;
+
+add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x3 sse3/;
+
+add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x3 sse3/;
+
+add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x3 sse3/;
+
+add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad64x64x8/;
+
+add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad32x32x8/;
+
+add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x16x8 sse4/;
+
+add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x16x8 sse4/;
+
+add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x4x8/;
+
+add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x8x8/;
+
+add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x4x8 sse4/;
+
+add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x4d sse2/;
+
+add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x64x4d sse2/;
+
+add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x4d sse2/;
+
+add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x4d sse2/;
+
+add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x4d sse2/;
+
+add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x4d sse2/;
+
+# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
+add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x4x4d sse2/;
+
+add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x8x4d sse/;
+
+add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x4d sse/;
+
+#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
+#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
+
+add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x16/;
+
+add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x8/;
+
+add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x8/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse64x64/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse32x32/;
+
+add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
+specialize qw/vp9_get_mb_ss mmx sse2/;
+# ENCODEMB INVOKE
+
+add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+specialize qw/vp9_block_error/, "$sse2_x86inc";
+
+add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
+specialize qw/vp9_subtract_block/, "$sse2_x86inc";
+
+add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
+
+add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
+
+ add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
+}
+
+# fdct functions
+add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fwht4x4/;
+
+add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+
+#
+# Motion search
+#
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+specialize qw/vp9_full_search_sad sse3 sse4_1/;
+$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
+$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
+
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_refining_search_sad sse3/;
+$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
+
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_diamond_search_sad sse3/;
+$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
+
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_full_range_search/;
+
+add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+specialize qw/vp9_temporal_filter_apply sse2/;
+
+}
+# end encoder functions
+1;
+++ /dev/null
-vp9_common_forward_decls() {
-cat <<EOF
-/*
- * VP9
- */
-
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_enums.h"
-
-struct macroblockd;
-
-/* Encoder forward decls */
-struct macroblock;
-struct vp9_variance_vtable;
-
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
-struct mv;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp9_common_forward_decls
-
-# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
-[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse &&
- sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2
-
-# this variable is for functions that are 64 bit only.
-[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 &&
- ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2
-
-#
-# RECON
-#
-prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_4x4 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_4x4
-
-prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_4x4
-
-prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_4x4 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_4x4
-
-prototype void vp9_dc_left_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_4x4
-
-prototype void vp9_dc_128_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_4x4
-
-prototype void vp9_d207_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_8x8 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_8x8
-
-prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_8x8
-
-prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_8x8 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_8x8
-
-prototype void vp9_dc_left_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_8x8
-
-prototype void vp9_dc_128_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_8x8
-
-prototype void vp9_d207_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_16x16 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_16x16
-
-prototype void vp9_d135_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_16x16
-
-prototype void vp9_d153_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_16x16
-
-prototype void vp9_dc_left_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_16x16
-
-prototype void vp9_dc_128_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_16x16
-
-prototype void vp9_d207_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_32x32 $ssse3_x86inc neon
-
-prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_32x32
-
-prototype void vp9_d135_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_32x32
-
-prototype void vp9_d153_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_32x32
-
-prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_32x32 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
-
-prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_32x32 $sse2_x86inc
-
-prototype void vp9_dc_top_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_32x32
-
-prototype void vp9_dc_left_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_32x32
-
-prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_32x32
-
-#
-# Loopfilter
-#
-prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_8 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_4 mmx neon dspr2
-
-prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_8 sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_4 mmx neon dspr2
-
-prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
-
-#
-# post proc
-#
-if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then
-prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_down mmx sse2
-vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
-
-prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_across_ip sse2
-vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm
-
-prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"
-specialize vp9_post_proc_down_and_across mmx sse2
-vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm
-
-prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
-specialize vp9_plane_add_noise mmx sse2
-vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt
-fi
-
-prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_inner
-
-prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_outer
-
-prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_b
-
-#
-# Sub Pixel Filters
-#
-prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_copy $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_avg $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2
-
-#
-# dct
-#
-prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_1_add sse2 neon dspr2
-
-prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_16_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_1_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_64_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_10_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_1_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_256_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_10_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1024_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_34_add sse2 neon dspr2
-vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon
-
-prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1_add sse2 neon dspr2
-
-prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht4x4_16_add sse2 neon dspr2
-
-prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht8x8_64_add sse2 neon dspr2
-
-prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type"
-specialize vp9_iht16x16_256_add sse2 dspr2
-
-# dct and add
-
-prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_1_add
-
-prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_16_add
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
-
-
-# variance
-prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x16 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x32 $sse2_x86inc
-
-prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x64 $sse2_x86inc
-
-prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x64 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x8 mmx $sse2_x86inc
-
-prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
-specialize vp9_get_sse_sum_8x8 sse2
-vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
-
-prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x4 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x8 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x4 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
-prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc
-#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x4 $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x8 $sse_x86inc
-
-prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx $sse_x86inc
-
-prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x4_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x8_avg $sse_x86inc
-
-prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x4_avg $sse_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_h
-
-prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_v
-
-prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_hv
-
-prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_h
-
-prototype unsigned int vp9_variance_halfpixvar32x32_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_v
-
-prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_hv
-
-prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x3
-
-prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x3
-
-prototype void vp9_sad16x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x3 sse3 ssse3
-
-prototype void vp9_sad16x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x3 sse3 ssse3
-
-prototype void vp9_sad8x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x3 sse3
-
-prototype void vp9_sad8x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x3 sse3
-
-prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x3 sse3
-
-prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad64x64x8
-
-prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad32x32x8
-
-prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x16x8 sse4
-
-prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x8x8 sse4
-
-prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x16x8 sse4
-
-prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x8x8 sse4
-
-prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x4x8
-
-prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x8x8
-
-prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x4x8 sse4
-
-prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x4d sse2
-
-prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x64x4d sse2
-
-prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x32x4d sse2
-
-prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x16x4d sse2
-
-prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x32x4d sse2
-
-prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x4d sse2
-
-prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x4d sse2
-
-prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x4d sse2
-
-prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x4d sse2
-
-prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x4d sse2
-
-# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
-prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x4x4d sse2
-
-prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x8x4d sse
-
-prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x4d sse
-
-#prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
-#specialize vp9_sub_pixel_mse16x16 sse2 mmx
-
-prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
-specialize vp9_mse16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
-specialize vp9_mse8x16
-
-prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
-specialize vp9_mse16x8
-
-prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
-specialize vp9_mse8x8
-
-prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse64x64
-
-prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse32x32
-
-prototype unsigned int vp9_get_mb_ss "const int16_t *"
-specialize vp9_get_mb_ss mmx sse2
-# ENCODEMB INVOKE
-
-prototype int64_t vp9_block_error "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"
-specialize vp9_block_error $sse2_x86inc
-
-prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
-specialize vp9_subtract_block $sse2_x86inc
-
-prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b $ssse3_x86_64
-
-prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 $ssse3_x86_64
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
- prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp9_ssim_parms_8x8 $sse2_x86_64
-
- prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
- specialize vp9_ssim_parms_16x16 $sse2_x86_64
-fi
-
-# fdct functions
-prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht4x4 sse2 avx2
-
-prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht8x8 sse2 avx2
-
-prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht16x16 sse2 avx2
-
-prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fwht4x4
-
-prototype void vp9_fdct4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct4x4 sse2 avx2
-
-prototype void vp9_fdct8x8 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct8x8 sse2 avx2
-
-prototype void vp9_fdct16x16 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct16x16 sse2 avx2
-
-prototype void vp9_fdct32x32 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32 sse2 avx2
-
-prototype void vp9_fdct32x32_rd "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32_rd sse2 avx2
-
-#
-# Motion search
-#
-prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"
-specialize vp9_full_search_sad sse3 sse4_1
-vp9_full_search_sad_sse3=vp9_full_search_sadx3
-vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
-
-prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_refining_search_sad sse3
-vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
-
-prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_diamond_search_sad sse3
-vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
-
-prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_full_range_search
-
-prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
-specialize vp9_temporal_filter_apply sse2
-
-fi
-# end encoder functions
VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
-VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
+VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl
VP9_COMMON_SRCS-yes += common/vp9_scale.h
VP9_COMMON_SRCS-yes += common/vp9_scale.c
VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM)
-$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
SCALE_SRCS-yes += vpx_scale_asm_offsets.c
SCALE_SRCS-yes += vpx_scale_rtcd.c
-SCALE_SRCS-yes += vpx_scale_rtcd.sh
+SCALE_SRCS-yes += vpx_scale_rtcd.pl
#neon
SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
$(eval $(call asm_offsets_template,\
vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
-$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh))
+$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl))
--- /dev/null
+sub vpx_scale_forward_decls() {
+print <<EOF
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vpx_scale_forward_decls/;
+
+# Scaler functions
+if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
+ add_proto qw/void vp8_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+ add_proto qw/void vp8_vertical_band_5_4_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+ add_proto qw/void vp8_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+ add_proto qw/void vp8_vertical_band_5_3_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+ add_proto qw/void vp8_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+ add_proto qw/void vp8_vertical_band_2_1_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+ add_proto qw/void vp8_vertical_band_2_1_scale_i/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+}
+
+add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+specialize qw/vp8_yv12_extend_frame_borders neon/;
+
+add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_frame neon/;
+
+add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vpx_yv12_copy_y neon/;
+
+if (vpx_config("CONFIG_VP9") eq "yes") {
+ add_proto qw/void vp9_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+ specialize qw/vp9_extend_frame_borders dspr2/;
+
+ add_proto qw/void vp9_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
+ specialize qw/vp9_extend_frame_inner_borders dspr2/;
+}
+1;
+++ /dev/null
-vpx_scale_forward_decls() {
-cat <<EOF
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vpx_scale_forward_decls
-
-# Scaler functions
-if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then
- prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
- prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
- prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-fi
-
-prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
-specialize vp8_yv12_extend_frame_borders neon
-
-prototype void vp8_yv12_copy_frame "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_frame neon
-
-prototype void vpx_yv12_copy_y "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vpx_yv12_copy_y neon
-
-if [ "$CONFIG_VP9" = "yes" ]; then
- prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf"
- specialize vp9_extend_frame_borders dspr2
-
- prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf"
- specialize vp9_extend_frame_inner_borders dspr2
-fi