From 807acf17baf7ba839ecc0038cdaee5d17964e612 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Fri, 2 Nov 2012 15:39:14 -0700 Subject: [PATCH] vpx_ports: merge with master Change-Id: I25c067326153455abe1a79f8f44f70b87350e655 --- vpx_ports/arm.h | 2 +- vpx_ports/arm_cpudetect.c | 107 ++++++++++++++++++++++++++---------------- vpx_ports/asm_offsets.h | 6 +-- vpx_ports/emms.asm | 6 +-- vpx_ports/mem.h | 2 +- vpx_ports/mem_ops.h | 12 ++--- vpx_ports/mem_ops_aligned.h | 26 +++++----- vpx_ports/vpx_timer.h | 11 ++--- vpx_ports/vpxtypes.h | 7 +-- vpx_ports/x86.h | 42 ++++++++++++++++- vpx_ports/x86_abi_support.asm | 50 +++++++++++++++++++- 11 files changed, 189 insertions(+), 82 deletions(-) diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h index 81af1f1..525a764 100644 --- a/vpx_ports/arm.h +++ b/vpx_ports/arm.h @@ -12,7 +12,7 @@ #ifndef VPX_PORTS_ARM_H #define VPX_PORTS_ARM_H #include -#include "config.h" +#include "vpx_config.h" /*ARMv5TE "Enhanced DSP" instructions.*/ #define HAS_EDSP 0x01 diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c index 7581159..f36d46d 100644 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@ -29,8 +29,31 @@ static int arm_cpu_env_mask(void) { return env && *env ? (int)strtol(env, NULL, 0) : ~0; } +#if !CONFIG_RUNTIME_CPU_DETECT -#if defined(_MSC_VER) +int arm_cpu_caps(void) { + /* This function should actually be a no-op. There is no way to adjust any of + * these because the RTCD tables do not exist: the functions are called + * statically */ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); +#if HAVE_EDSP + flags |= HAS_EDSP; +#endif /* HAVE_EDSP */ +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON + flags |= HAS_NEON; +#endif /* HAVE_NEON */ + return flags & mask; +} + +#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #define WIN32_LEAN_AND_MEAN #define WIN32_EXTRA_LEAN @@ -47,7 +70,7 @@ int arm_cpu_caps(void) { * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -#if defined(HAVE_ARMV5TE) +#if HAVE_EDSP if (mask & HAS_EDSP) { __try { /*PLD [r13]*/ @@ -57,7 +80,7 @@ int arm_cpu_caps(void) { /*Ignore exception.*/ } } -#if defined(HAVE_ARMV6) +#if HAVE_MEDIA if (mask & HAS_MEDIA) __try { /*SHADD8 r3,r3,r3*/ @@ -67,7 +90,7 @@ int arm_cpu_caps(void) { /*Ignore exception.*/ } } -#if defined(HAVE_ARMV7) +#if HAVE_NEON if (mask &HAS_NEON) { __try { /*VORR q0,q0,q0*/ @@ -77,13 +100,39 @@ if (mask &HAS_NEON) { /*Ignore exception.*/ } } -#endif -#endif -#endif +#endif /* HAVE_NEON */ +#endif /* HAVE_MEDIA */ +#endif /* HAVE_EDSP */ return flags & mask; } -#elif defined(__linux__) +#elif defined(__ANDROID__) /* end _MSC_VER */ +#include + +int arm_cpu_caps(void) { + int flags; + int mask; + uint64_t features; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + features = android_getCpuFeatures(); + +#if HAVE_EDSP + flags |= HAS_EDSP; +#endif /* HAVE_EDSP */ +#if HAVE_MEDIA + flags |= HAS_MEDIA; +#endif /* HAVE_MEDIA */ +#if HAVE_NEON + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + flags |= HAS_NEON; +#endif /* HAVE_NEON */ + return flags & mask; +} + +#elif defined(__linux__) /* end __ANDROID__ */ #include int arm_cpu_caps(void) { @@ -105,24 +154,24 @@ int arm_cpu_caps(void) { */ char buf[512]; while (fgets(buf, 511, fin) != NULL) { -#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7) +#if HAVE_EDSP || HAVE_NEON if (memcmp(buf, "Features", 8) == 0) { char *p; -#if defined(HAVE_ARMV5TE) +#if HAVE_EDSP p = strstr(buf, " edsp"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_EDSP; } -#if defined(HAVE_ARMV7) +#if HAVE_NEON p = strstr(buf, " neon"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_NEON; } -#endif -#endif +#endif /* HAVE_NEON */ +#endif /* HAVE_EDSP */ } -#endif -#if defined(HAVE_ARMV6) +#endif /* HAVE_EDSP || HAVE_NEON */ +#if HAVE_MEDIA if (memcmp(buf, "CPU architecture:", 17) == 0) { int version; version = atoi(buf + 17); @@ -130,35 +179,13 @@ int arm_cpu_caps(void) { flags |= HAS_MEDIA; } } -#endif +#endif /* HAVE_MEDIA */ } fclose(fin); } return flags & mask; } - -#elif !CONFIG_RUNTIME_CPU_DETECT - -int arm_cpu_caps(void) { - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); -#if defined(HAVE_ARMV5TE) - flags |= HAS_EDSP; -#endif -#if defined(HAVE_ARMV6) - flags |= HAS_MEDIA; -#endif -#if defined(HAVE_ARMV7) - flags |= HAS_NEON; -#endif - return flags & mask; -} - -#else +#else /* end __linux__ */ #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ -"available for your platform. Reconfigure without --enable-runtime-cpu-detect." +"available for your platform. Reconfigure with --disable-runtime-cpu-detect." #endif diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h index 6752463..d3a3e5a 100644 --- a/vpx_ports/asm_offsets.h +++ b/vpx_ports/asm_offsets.h @@ -18,12 +18,12 @@ static void assert_##name(void) UNUSED;\ static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} -#if defined(INLINE_ASM) && INLINE_ASM -#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)); +#if INLINE_ASM +#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)) #define BEGIN int main(void) { #define END return 0; } #else -#define DEFINE(sym, val) int sym = val; +#define DEFINE(sym, val) const int sym = val #define BEGIN #define END #endif diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index 306e235..efad1a5 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -12,14 +12,14 @@ %include "vpx_ports/x86_abi_support.asm" section .text - global sym(vpx_reset_mmx_state) +global sym(vpx_reset_mmx_state) PRIVATE sym(vpx_reset_mmx_state): emms ret %ifidn __OUTPUT_FORMAT__,x64 -global sym(vpx_winx64_fldcw) +global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 mov [rsp], rcx ; win x64 specific @@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw): ret -global sym(vpx_winx64_fstcw) +global sym(vpx_winx64_fstcw) PRIVATE sym(vpx_winx64_fstcw): sub rsp, 8 fstcw [rsp] diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h index c68b908..b130da8 100644 --- a/vpx_ports/mem.h +++ b/vpx_ports/mem.h @@ -14,7 +14,7 @@ #include "vpx_config.h" #include "vpx/vpx_integer.h" -#if defined(__GNUC__) && __GNUC__ +#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) #define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n))) #elif defined(_MSC_VER) #define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h index 038f293..2d44a3a 100644 --- a/vpx_ports/mem_ops.h +++ b/vpx_ports/mem_ops.h @@ -139,27 +139,27 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { #undef mem_get_sbe16 #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) -mem_get_s_generic(be, 16); +mem_get_s_generic(be, 16) #undef mem_get_sbe24 #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) -mem_get_s_generic(be, 24); +mem_get_s_generic(be, 24) #undef mem_get_sbe32 #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) -mem_get_s_generic(be, 32); +mem_get_s_generic(be, 32) #undef mem_get_sle16 #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) -mem_get_s_generic(le, 16); +mem_get_s_generic(le, 16) #undef mem_get_sle24 #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) -mem_get_s_generic(le, 24); +mem_get_s_generic(le, 24) #undef mem_get_sle32 #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) -mem_get_s_generic(le, 32); +mem_get_s_generic(le, 32) #undef mem_put_be16 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h index de1b6d4..0100300 100644 --- a/vpx_ports/mem_ops_aligned.h +++ b/vpx_ports/mem_ops_aligned.h @@ -80,7 +80,7 @@ *mem = (uint##sz##_t)raw;\ } -#include "config.h" +#include "vpx_config.h" #if CONFIG_BIG_ENDIAN #define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be,sz) #define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be,sz) @@ -99,51 +99,51 @@ #undef mem_get_be16_aligned #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) -mem_get_be_aligned_generic(16); +mem_get_be_aligned_generic(16) #undef mem_get_be32_aligned #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) -mem_get_be_aligned_generic(32); +mem_get_be_aligned_generic(32) #undef mem_get_le16_aligned #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) -mem_get_le_aligned_generic(16); +mem_get_le_aligned_generic(16) #undef mem_get_le32_aligned #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) -mem_get_le_aligned_generic(32); +mem_get_le_aligned_generic(32) #undef mem_get_sbe16_aligned #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) -mem_get_sbe_aligned_generic(16); +mem_get_sbe_aligned_generic(16) #undef mem_get_sbe32_aligned #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) -mem_get_sbe_aligned_generic(32); +mem_get_sbe_aligned_generic(32) #undef mem_get_sle16_aligned #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) -mem_get_sle_aligned_generic(16); +mem_get_sle_aligned_generic(16) #undef mem_get_sle32_aligned #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) -mem_get_sle_aligned_generic(32); +mem_get_sle_aligned_generic(32) #undef mem_put_be16_aligned #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) -mem_put_be_aligned_generic(16); +mem_put_be_aligned_generic(16) #undef mem_put_be32_aligned #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) -mem_put_be_aligned_generic(32); +mem_put_be_aligned_generic(32) #undef mem_put_le16_aligned #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) -mem_put_le_aligned_generic(16); +mem_put_le_aligned_generic(16) #undef mem_put_le32_aligned #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) -mem_put_le_aligned_generic(32); +mem_put_le_aligned_generic(32) #undef mem_get_ne_aligned_generic #undef mem_get_se_aligned_generic diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h index ae7a996..cdad9ef 100644 --- a/vpx_ports/vpx_timer.h +++ b/vpx_ports/vpx_timer.h @@ -11,6 +11,7 @@ #ifndef VPX_TIMER_H #define VPX_TIMER_H +#include "vpx/vpx_integer.h" #if CONFIG_OS_SUPPORT @@ -72,22 +73,20 @@ vpx_usec_timer_mark(struct vpx_usec_timer *t) { } -static long +static int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { #if defined(_WIN32) LARGE_INTEGER freq, diff; diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; - if (QueryPerformanceFrequency(&freq) && diff.QuadPart < freq.QuadPart) - return (long)(diff.QuadPart * 1000000 / freq.QuadPart); - - return 1000000; + QueryPerformanceFrequency(&freq); + return diff.QuadPart * 1000000 / freq.QuadPart; #else struct timeval diff; timersub(&t->end, &t->begin, &diff); - return diff.tv_sec ? 1000000 : diff.tv_usec; + return diff.tv_sec * 1000000 + diff.tv_usec; #endif } diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h index 9dce15e..4365213 100644 --- a/vpx_ports/vpxtypes.h +++ b/vpx_ports/vpxtypes.h @@ -12,7 +12,7 @@ #ifndef __VPXTYPES_H__ #define __VPXTYPES_H__ -#include "vpx_ports/config.h" +#include "vpx_config.h" // #include #ifdef _MSC_VER @@ -96,11 +96,6 @@ typedef unsigned __int64 vpxu64; # define PRId64 "lld" # define VPX64 PRId64 typedef long vpxs64; -#elif defined(__SYMBIAN32__) -# undef PRId64 -# define PRId64 "u" -# define VPX64 PRId64 -typedef unsigned int vpxs64; #else # error "64 bit integer type undefined for this platform!" #endif diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h index 5aeaf9d..f883772 100644 --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@ -12,7 +12,7 @@ #ifndef VPX_PORTS_X86_H #define VPX_PORTS_X86_H #include -#include "config.h" +#include "vpx_config.h" typedef enum { VPX_CPU_UNKNOWN = -1, @@ -49,6 +49,26 @@ typedef enum { : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ : "a" (func)); #endif +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#if ARCH_X86_64 +#define cpuid(func,ax,bx,cx,dx)\ + asm volatile (\ + "xchg %rsi, %rbx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "xchg %rsi, %rbx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); +#else +#define cpuid(func,ax,bx,cx,dx)\ + asm volatile (\ + "pushl %ebx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "popl %ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); +#endif #else #if ARCH_X86_64 void __cpuid(int CPUInfo[4], int info_type); @@ -133,9 +153,13 @@ x86_readtsc(void) { unsigned int tsc; __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); return tsc; +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + unsigned int tsc; + asm volatile("rdtsc\n\t":"=a"(tsc):); + return tsc; #else #if ARCH_X86_64 - return __rdtsc(); + return (unsigned int)__rdtsc(); #else __asm rdtsc; #endif @@ -146,6 +170,9 @@ x86_readtsc(void) { #if defined(__GNUC__) && __GNUC__ #define x86_pause_hint()\ __asm__ __volatile__ ("pause \n\t") +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#define x86_pause_hint()\ + asm volatile ("pause \n\t") #else #if ARCH_X86_64 #define x86_pause_hint()\ @@ -167,6 +194,17 @@ x87_get_control_word(void) { __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):); return mode; } +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +static void +x87_set_control_word(unsigned short mode) { + asm volatile("fldcw %0" : : "m"( *&mode)); +} +static unsigned short +x87_get_control_word(void) { + unsigned short mode; + asm volatile("fstcw %0\n\t":"=m"( *&mode):); + return mode; +} #elif ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ extern void vpx_winx64_fldcw(unsigned short mode); diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 7382a91..0c9fe37 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -22,6 +22,8 @@ %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,win32 %define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,aout +%define ABI_IS_32BIT 1 %else %define ABI_IS_32BIT 0 %endif @@ -86,12 +88,41 @@ %define sym(x) x %elifidn __OUTPUT_FORMAT__,elf64 %define sym(x) x +%elifidn __OUTPUT_FORMAT__,elfx32 +%define sym(x) x %elifidn __OUTPUT_FORMAT__,x64 %define sym(x) x %else %define sym(x) _ %+ x %endif +; PRIVATE +; Macro for the attribute to hide a global symbol for the target ABI. +; This is only active if CHROMIUM is defined. +; +; Chromium doesn't like exported global symbols due to symbol clashing with +; plugins among other things. +; +; Requires Chromium's patched copy of yasm: +; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 +; http://www.tortall.net/projects/yasm/ticket/236 +; +%ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,elf32 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elf64 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define PRIVATE :hidden + %elifidn __OUTPUT_FORMAT__,x64 + %define PRIVATE + %else + %define PRIVATE :private_extern + %endif +%else + %define PRIVATE +%endif + ; arg() ; Return the address specification of the given argument ; @@ -179,7 +210,16 @@ %endmacro %endif %endif - %define HIDDEN_DATA(x) x + + %ifdef CHROMIUM + %ifidn __OUTPUT_FORMAT__,macho32 + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif + %else + %define HIDDEN_DATA(x) x + %endif %else %macro GET_GOT 1 %endmacro @@ -187,6 +227,9 @@ %ifidn __OUTPUT_FORMAT__,elf64 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,elfx32 + %define WRT_PLT wrt ..plt + %define HIDDEN_DATA(x) x:data hidden %else %define HIDDEN_DATA(x) x %endif @@ -314,6 +357,8 @@ %macro SECTION_RODATA 0 section .text %endmacro +%elifidn __OUTPUT_FORMAT__,aout +%define SECTION_RODATA section .data %else %define SECTION_RODATA section .rodata %endif @@ -326,5 +371,8 @@ section .text %elifidn __OUTPUT_FORMAT__,elf64 section .note.GNU-stack noalloc noexec nowrite progbits section .text +%elifidn __OUTPUT_FORMAT__,elfx32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text %endif -- 2.7.4