Add runtime CPU detection support for ARM.

author Timothy B. Terriberry <tterribe@xiph.org>

Wed, 20 Oct 2010 22:39:11 +0000 (15:39 -0700)

committer Johann <johannkoenig@google.com>

Mon, 25 Oct 2010 13:23:29 +0000 (09:23 -0400)
author Timothy B. Terriberry <tterribe@xiph.org>
Wed, 20 Oct 2010 22:39:11 +0000 (15:39 -0700)
committer Johann <johannkoenig@google.com>
Mon, 25 Oct 2010 13:23:29 +0000 (09:23 -0400)
diff --git a/libs.mk b/libs.mk

index 4beaa50..9ded394 100644 (file)
--- a/libs.mk
+++ b/libs.mk
@@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
  endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
  CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
  CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
  CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c

new file mode 100644 (file)

index 0000000..fe62fae
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h

index f28d7f6..8b8d179 100644 (file)
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
  extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
  extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_idct_idct1
  #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
  
@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
  #undef  vp8_idct_iwalsh16
  #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
  extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
  extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_idct_idct1
  #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
  
@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
  #undef  vp8_idct_iwalsh16
  #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h

index 6c3628a..cd62207 100644 (file)
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
  extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
  extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_lf_normal_mb_v
  #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
  
@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
  #undef  vp8_lf_simple_b_h
  #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
  extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
  extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_lf_normal_mb_v
  #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
  
@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
  #undef  vp8_lf_simple_b_h
  #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h

index 18855a3..c30f6dc 100644 (file)
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
  extern prototype_copy_block(vp8_copy_mem8x4_v6);
  extern prototype_copy_block(vp8_copy_mem16x16_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_recon_recon
  #define vp8_recon_recon vp8_recon_b_armv6
  
@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
  #undef  vp8_recon_copy16x16
  #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
  extern prototype_copy_block(vp8_copy_mem8x4_neon);
  extern prototype_copy_block(vp8_copy_mem16x16_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_recon_recon
  #define vp8_recon_recon vp8_recon_b_neon
  
@@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
  #undef  vp8_recon_copy16x16
  #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h

index 53600e5..6288538 100644 (file)
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
  extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
  extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_subpix_sixtap16x16
  #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
  
@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
  #undef  vp8_subpix_bilinear4x4
  #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
  extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
  extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_subpix_sixtap16x16
  #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
  
@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
  #undef  vp8_subpix_bilinear4x4
  #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c

deleted file mode 100644 (file)

index 1eed97e..0000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-    rtcd->recon.recon       = vp8_recon_b_neon;
-    rtcd->recon.recon2      = vp8_recon2b_neon;
-    rtcd->recon.recon4      = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-    rtcd->recon.recon       = vp8_recon_b_armv6;
-    rtcd->recon.recon2      = vp8_recon2b_armv6;
-    rtcd->recon.recon4      = vp8_recon4b_armv6;
-#else
-//pure c
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
-    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c

index c04e31f..0ef375e 100644 (file)
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
  #include "onyxc_int.h"
  
  extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
  
  void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
  extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
      vp8_arch_x86_common_init(ctx);
  #endif
  
+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
  }
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h

index 4966002..d12143d 100644 (file)
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
      vp8_subpix_rtcd_vtable_t      subpix;
      vp8_loopfilter_rtcd_vtable_t  loopfilter;
      vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
  #else
      int unused;
  #endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c

new file mode 100644 (file)

index 0000000..77cff47
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = pbi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_v6;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_v6;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_neon;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        /*This is not used: NEON always dequants two blocks at once.
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_neon;*/
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h

index 40151e0..b7d800d 100644 (file)
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6)
  extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
  extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_dequant_block
  #define vp8_dequant_block vp8_dequantize_b_v6
  
@@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
  #undef vp8_dequant_idct_add_uv_block
  #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_dequant_block(vp8_dequantize_b_neon);
@@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo
  extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
  extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_dequant_block
  #define vp8_dequant_block vp8_dequantize_b_neon
  
@@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
  #undef vp8_dequant_idct_add_uv_block
  #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c

deleted file mode 100644 (file)

index 9dcf7b6..0000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-#if HAVE_ARMV7
-    pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
-    pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c

index 60f2af5..84de7af 100644 (file)
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -14,6 +14,7 @@
  #include "onyxd_int.h"
  
  extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
  
  void vp8_dmachine_specific_config(VP8D_COMP *pbi)
  {
@@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
  #if ARCH_X86 || ARCH_X86_64
      vp8_arch_x86_decode_init(pbi);
  #endif
+
+#if ARCH_ARM
+    vp8_arch_arm_decode_init(pbi);
+#endif
  }
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c

index 884c38d..b5a6e3e 100644 (file)
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -30,6 +30,9 @@
  #include "systemdependent.h"
  #include "vpx_ports/vpx_timer.h"
  #include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  extern void vp8_init_loop_filter(VP8_COMMON *cm);
  extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
@@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
  #if HAVE_ARMV7
  extern void vp8_push_neon(INT64 *store);
  extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
  #endif
  
  static int get_free_fb (VP8_COMMON *cm)
@@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm)
  
  int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
  {
+#if HAVE_ARMV7
+    INT64 dx_store_reg[8];
+#endif
      VP8D_COMP *pbi = (VP8D_COMP *) ptr;
      VP8_COMMON *cm = &pbi->common;
      int retcode = 0;
@@ -327,10 +332,27 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
  
      pbi->common.error.error_code = VPX_CODEC_OK;
  
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(dx_store_reg);
+    }
+#endif
+
      cm->new_fb_idx = get_free_fb (cm);
  
      if (setjmp(pbi->common.error.jmp))
      {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
+#endif
          pbi->common.error.setjmp = 0;
          if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
            cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
@@ -339,10 +361,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
  
      pbi->common.error.setjmp = 1;
  
-#if HAVE_ARMV7
-    vp8_push_neon(dx_store_reg);
-#endif
-
      vpx_usec_timer_start(&timer);
  
      //cm->current_video_frame++;
@@ -354,7 +372,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
      if (retcode < 0)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
  #endif
          pbi->common.error.error_code = VPX_CODEC_ERROR;
          pbi->common.error.setjmp = 0;
@@ -367,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
      {
          if (swap_frame_buffers (cm))
          {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
              pbi->common.error.error_code = VPX_CODEC_ERROR;
              pbi->common.error.setjmp = 0;
              return -1;
@@ -375,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
      {
          if (swap_frame_buffers (cm))
          {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
              pbi->common.error.error_code = VPX_CODEC_ERROR;
              pbi->common.error.setjmp = 0;
              return -1;
@@ -455,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
  #endif
  
  #if HAVE_ARMV7
-    vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(dx_store_reg);
+    }
  #endif
      pbi->common.error.setjmp = 0;
      return retcode;
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c

new file mode 100644 (file)

index 0000000..8736fcf
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = cpi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        /*cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
+        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
+        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
+        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+
+        /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;*/
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;*/
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+
+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
+        /*cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;*/
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+        /* The neon quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+    }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm

similarity index 95%

rename from vp8/encoder/arm/neon/boolhuff_armv7.asm

rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm

index 9c4823c..e78dc33 100644 (file)
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -205,17 +205,10 @@ token_count_lt_zero_se
      ldr     r5, [r0, #vp8_writer_range]
      ldr     r3, [r0, #vp8_writer_count]
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r11, r1
      rsb     r4, r10, #32                 ; 32-n
  
      ; v is kept in r1 during the token pack loop
-    lsr     r1, r11, r4                 ; v >>= 32 - n
+    lsl     r1, r1, r4                  ; r1 = v << 32 - n
  
  encode_value_loop
      sub     r7, r5, #1                  ; range-1
@@ -223,7 +216,7 @@ encode_value_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r1, r1, #1                  ; bit = v >> n
+    lsls    r1, r1, #1                  ; bit = v >> n
      mov     r4, r7, lsl #7              ; ((range-1) * 128)
  
      mov     r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm

similarity index 93%

rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm

index c19ac82..3233d2a 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_tokens_armv7|
+    EXPORT |vp8cx_pack_tokens_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -25,7 +25,7 @@
  ; r3 vp8_coef_encodings
  ; s0 vp8_extra_bits
  ; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
      push    {r4-r11, lr}
  
      ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -57,18 +57,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #52]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -78,7 +71,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -172,16 +165,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm

similarity index 94%

rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm

index 0756455..a9b552a 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+    EXPORT |vp8cx_pack_mb_row_tokens_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -25,7 +25,7 @@
  ; r3 vp8_extra_bits
  ; s0 vp8_coef_tree
  
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
      push    {r4-r11, lr}
      sub     sp, sp, #24
  
@@ -78,18 +78,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #60]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                 ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -99,7 +92,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -193,16 +186,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm

similarity index 95%

rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm

index 10a3d98..0835164 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+    EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -27,7 +27,7 @@
  ; s1 vp8_extra_bits,
  ; s2 const vp8_tree_index *,
  
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
      push    {r4-r11, lr}
      sub     sp, sp, #44
  
@@ -106,18 +106,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #88]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -127,7 +120,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -221,16 +214,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c

deleted file mode 100644 (file)

index 8d70d63..0000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    cpi->rtcd.common                         = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-    /* The neon quantizer has not been updated to match the new exact
-     * quantizer introduced in commit e04e2935
-     */
-    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
-#elif HAVE_ARMV6
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#else
-    //pure c
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
-    vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h

index 774599b..41fa5d1 100644 (file)
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -15,9 +15,11 @@
  #if HAVE_ARMV6
  extern prototype_fdct(vp8_short_walsh4x4_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_fdct_walsh_short4x4
  #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
  extern prototype_fdct(vp8_fast_fdct8x4_neon);
  extern prototype_fdct(vp8_short_walsh4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_fdct_short4x4
  #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
  
@@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
  
  #undef  vp8_fdct_walsh_short4x4
  #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h

index eb69943..8fe4537 100644 (file)
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
  //#undef  vp8_encodemb_mbuverr
  //#define vp8_encodemb_mbuverr vp8_mbuverror_c
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_encodemb_subb
  #define vp8_encodemb_subb vp8_subtract_b_neon
  
@@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
  
  #undef  vp8_encodemb_submbuv
  #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h

index 859e43f..fb9dd5a 100644 (file)
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
  //extern prototype_variance2(vp8_get16x16var_c);
  extern prototype_sad(vp8_get4x4sse_cs_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_variance_sad4x4
  #define vp8_variance_sad4x4 vp8_sad4x4_neon
  
@@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
  
  #undef  vp8_variance_get4x4sse_cs
  #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h

index 5596313..f5d148e 100644 (file)
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,25 +12,25 @@
  #ifndef __INC_BITSTREAM_H
  #define __INC_BITSTREAM_H
  
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                               vp8_token *,
                               vp8_extra_bit_struct *,
                               const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
          vp8_token *,
          vp8_extra_bit_struct *,
          const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                      vp8_token *,
                                      vp8_extra_bit_struct *,
                                      const vp8_tree_index *);
  # define pack_tokens(a,b,c)                  \
-    vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  # define pack_tokens_into_partitions(a,b,c,d)  \
-    vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  # define pack_mb_row_tokens(a,b)               \
-    vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  #else
  # define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
  # define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c

index 1acb73d..520b08f 100644 (file)
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -15,6 +15,7 @@
  
  
  void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
  
  
  void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
      vp8_arch_x86_encoder_init(cpi);
  #endif
  
+#if ARCH_ARM
+    vp8_arch_arm_encoder_init(cpi);
+#endif
+
  }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index 53d68be..7e1583d 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -31,6 +31,9 @@
  #include "vpx_ports/vpx_timer.h"
  #include "vpxerrors.h"
  #include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  #include <math.h>
  #include <stdio.h>
@@ -2106,8 +2109,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
      CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
      CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
  
-    vp8_cmachine_specific_config(cpi);
      vp8_create_common(&cpi->common);
+    vp8_cmachine_specific_config(cpi);
  
      vp8_init_config((VP8_PTR)cpi, oxcf);
  
@@ -2852,9 +2855,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
      {
          //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
  #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+        }
  #endif
  
          cpi->Source = &cpi->scaled_source;
@@ -4624,10 +4638,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
  #if HAVE_ARMV7
  extern void vp8_push_neon(INT64 *store);
  extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
  #endif
  int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
  {
+    INT64 store_reg[8];
      VP8_COMP *cpi = (VP8_COMP *) ptr;
      VP8_COMMON *cm = &cpi->common;
      struct vpx_usec_timer  timer;
@@ -4636,7 +4650,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
          return -1;
  
  #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
  #endif
  
      vpx_usec_timer_start(&timer);
@@ -4645,7 +4664,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
      if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return -1;
      }
@@ -4686,9 +4710,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
          s->source_time_stamp = time_stamp;
          s->source_frame_flags = frame_flags;
  #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+        }
  #endif
          cpi->source_buffer_count = 1;
      }
@@ -4697,14 +4732,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
      cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
  
  #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
  #endif
  
      return 0;
  }
  int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
  {
-
+    INT64 store_reg[8];
      VP8_COMP *cpi = (VP8_COMP *) ptr;
      VP8_COMMON *cm = &cpi->common;
      struct vpx_usec_timer  tsctimer;
@@ -4715,7 +4755,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
          return -1;
  
  #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
  #endif
  
      vpx_usec_timer_start(&cmptimer);
@@ -4867,7 +4912,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
  #endif
  
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return -1;
      }
@@ -4910,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
      if (!cpi)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return 0;
      }
@@ -5099,7 +5154,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
  #endif
  
  #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
  #endif
  
      return 0;
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c

index 79e07db..09e8b54 100644 (file)
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -16,6 +16,9 @@
  #include "vpx_scale/yv12extend.h"
  #include "vpx_scale/vpxscale.h"
  #include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
  extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val, int sharpness_lvl);
@@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
      //  Make a copy of the unfiltered / processed recon buffer
  #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+    }
  #endif
  
      if (cm->frame_type == KEY_FRAME)
@@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
      //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-    vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+    }
  #endif
  
      while (filter_step > 0)
@@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
              //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
  #endif
  
              // If value is close to the best so far then bias towards a lower loop filter value.
@@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
              //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
  #endif
  
              // Was it better than the previous best?
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk

index ecca18a..3b5aaa5 100644 (file)
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
  VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
  endif
  
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
+
  # common (c)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
@@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra4x4_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
  
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
-
  # common (armv6)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk

index 1424bd1..d126faf 100644 (file)
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -13,17 +13,22 @@
  
  #File list for arm
  # encoder
-VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
  
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/boolhuff_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
  
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6)  += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
  
  #File list for armv6
  # encoder
@@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/boolhuff_armv7$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
  
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk

index ae0610c..0803a9c 100644 (file)
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,11 +11,9 @@
  
  #VP8_DX_SRCS list is modified according to different platforms.
  
+VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/arm_dsystemdependent.c
+
  VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
  VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
  
  #File list for armv6
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h

new file mode 100644 (file)

index 0000000..81af1f1
--- /dev/null
+++ b/vpx_ports/arm.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VPX_PORTS_ARM_H
+#define VPX_PORTS_ARM_H
+#include <stdlib.h>
+#include "config.h"
+
+/*ARMv5TE "Enhanced DSP" instructions.*/
+#define HAS_EDSP  0x01
+/*ARMv6 "Parallel" or "Media" instructions.*/
+#define HAS_MEDIA 0x02
+/*ARMv7 optional NEON instructions.*/
+#define HAS_NEON  0x04
+
+int arm_cpu_caps(void);
+
+#endif
+
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c

new file mode 100644 (file)

index 0000000..4109924
--- /dev/null
+++ b/vpx_ports/arm_cpudetect.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arm.h"
+
+static int arm_cpu_env_flags(int *flags)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS");
+    if (env && *env)
+    {
+        *flags = (int)strtol(env, NULL, 0);
+        return 0;
+    }
+    *flags = 0;
+    return -1;
+}
+
+static int arm_cpu_env_mask(void)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS_MASK");
+    return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+}
+
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <windows.h>
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* MSVC has no inline __asm support for ARM, but it does let you __emit
+     *  instructions via their assembled hex code.
+     * All of these instructions should be essentially nops.
+     */
+#if defined(HAVE_ARMV5TE)
+    if (mask & HAS_EDSP)
+    {
+        __try
+        {
+            /*PLD [r13]*/
+            __emit(0xF5DDF000);
+            flags |= HAS_EDSP;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV6)
+    if (mask & HAS_MEDIA)
+        __try
+        {
+            /*SHADD8 r3,r3,r3*/
+            __emit(0xE6333F93);
+            flags |= HAS_MEDIA;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV7)
+    if (mask & HAS_NEON)
+    {
+        __try
+        {
+            /*VORR q0,q0,q0*/
+            __emit(0xF2200150);
+            flags |= HAS_NEON;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#endif
+#endif
+#endif
+    return flags & mask;
+}
+
+#elif defined(__linux__)
+#include <stdio.h>
+
+int arm_cpu_caps(void)
+{
+    FILE *fin;
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+     *  on Android.
+     * This also means that detection will fail in Scratchbox.
+     */
+    fin = fopen("/proc/cpuinfo","r");
+    if(fin != NULL)
+    {
+        /* 512 should be enough for anybody (it's even enough for all the flags
+         * that x86 has accumulated... so far).
+         */
+        char buf[512];
+        while (fgets(buf, 511, fin) != NULL)
+        {
+#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7)
+            if (memcmp(buf, "Features", 8) == 0)
+            {
+                char *p;
+#if defined(HAVE_ARMV5TE)
+                p=strstr(buf, " edsp");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_EDSP;
+                }
+#if defined(HAVE_ARMV7)
+                p = strstr(buf, " neon");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_NEON;
+                }
+#endif
+#endif
+            }
+#endif
+#if defined(HAVE_ARMV6)
+            if (memcmp(buf, "CPU architecture:",17) == 0){
+                int version;
+                version = atoi(buf+17);
+                if (version >= 6)
+                {
+                    flags |= HAS_MEDIA;
+                }
+            }
+#endif
+        }
+        fclose(fin);
+    }
+    return flags & mask;
+}
+
+#elif !CONFIG_RUNTIME_CPU_DETECT
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+#if defined(HAVE_ARMV5TE)
+    flags |= HAS_EDSP;
+#endif
+#if defined(HAVE_ARMV6)
+    flags |= HAS_MEDIA;
+#endif
+#if defined(HAVE_ARMV7)
+    flags |= HAS_NEON;
+#endif
+    return flags & mask;
+}
+
+#else
+#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+ "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+#endif
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c

index 1e8bcb8..fee76ff 100644 (file)
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -10,6 +10,7 @@
  
  
  #include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
  #include "vpx_scale/vpxscale.h"
  
  
@@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO
   ****************************************************************************/
  void vp8_scale_machine_specific_config()
  {
+#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT
+    int flags;
+#endif
      /*
      vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_armv4;
      vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_armv4;
@@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config()
      vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
      */
  
-#if HAVE_ARMV7
-    vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders_neon;
-    vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly_neon;
-    vp8_yv12_copy_frame_ptr               = vp8_yv12_copy_frame_neon;
-#else
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
      vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders;
      vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly;
      vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame;
  #endif
-
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    flags = arm_cpu_caps();
+    if (flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
+        vp8_yv12_copy_frame_yonly_ptr     = vp8_yv12_copy_frame_yonly_neon;
+        vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame_neon;
+    }
+#endif
  }
author	Timothy B. Terriberry <tterribe@xiph.org>
	Wed, 20 Oct 2010 22:39:11 +0000 (15:39 -0700)
committer	Johann <johannkoenig@google.com>
	Mon, 25 Oct 2010 13:23:29 +0000 (09:23 -0400)
libs.mk		patch \| blob \| history
vp8/common/arm/arm_systemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/common/arm/idct_arm.h		patch \| blob \| history
vp8/common/arm/loopfilter_arm.h		patch \| blob \| history
vp8/common/arm/recon_arm.h		patch \| blob \| history
vp8/common/arm/subpixel_arm.h		patch \| blob \| history
vp8/common/arm/systemdependent.c	[deleted file]	patch \| blob \| history
vp8/common/generic/systemdependent.c		patch \| blob \| history
vp8/common/onyxc_int.h		patch \| blob \| history
vp8/decoder/arm/arm_dsystemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/decoder/arm/dequantize_arm.h		patch \| blob \| history
vp8/decoder/arm/dsystemdependent.c	[deleted file]	patch \| blob \| history
vp8/decoder/generic/dsystemdependent.c		patch \| blob \| history
vp8/decoder/onyxd_if.c		patch \| blob \| history
vp8/encoder/arm/arm_csystemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/encoder/arm/armv5te/boolhuff_armv5te.asm	[moved from vp8/encoder/arm/neon/boolhuff_armv7.asm with 95% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm with 93% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm with 94% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm with 95% similarity]	patch \| blob \| history
vp8/encoder/arm/csystemdependent.c	[deleted file]	patch \| blob \| history
vp8/encoder/arm/dct_arm.h		patch \| blob \| history
vp8/encoder/arm/encodemb_arm.h		patch \| blob \| history
vp8/encoder/arm/variance_arm.h		patch \| blob \| history
vp8/encoder/bitstream.h		patch \| blob \| history
vp8/encoder/generic/csystemdependent.c		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/picklpf.c		patch \| blob \| history
vp8/vp8_common.mk		patch \| blob \| history
vp8/vp8cx_arm.mk		patch \| blob \| history
vp8/vp8dx_arm.mk		patch \| blob \| history
vpx_ports/arm.h	[new file with mode: 0644]	patch \| blob
vpx_ports/arm_cpudetect.c	[new file with mode: 0644]	patch \| blob
vpx_scale/arm/scalesystemdependant.c		patch \| blob \| history