__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
-
- /* internal use for CPU flags and such. */
- void *hints;
} primitives_t;
#ifdef __cplusplus
/* Prototypes for the externally-visible entrypoints. */
FREERDP_API void primitives_init(void);
FREERDP_API primitives_t *primitives_get(void);
-FREERDP_API UINT32 primitives_get_flags(
- const primitives_t *prims);
-FREERDP_API void primitives_flags_str(
- const primitives_t *prims,
- char *str,
- size_t len);
FREERDP_API void primitives_deinit(void);
#ifdef __cplusplus
-----------------\r
As the need arises, new optimizations can be added to the library,\r
including NEON, AVX, and perhaps OpenCL or other SIMD implementations.\r
-The initialization routine is free to do any quick run-time test to\r
-determine which features are available before hooking the operation's\r
-function pointer, or it can simply look at the processor features list\r
-from the hints passed to the initialization routine.\r
+The CPU feature detection is done in winpr/sysinfo.\r
\r
\r
Adding Entrypoints\r
The template functions can frequently be used to extend the\r
operations without writing a lot of new code.\r
\r
-\r
-Flags\r
------\r
-The entrypoint primitives_get_flags() returns a bitfield of processor flags\r
-(as defined in primitives.h) and primitives_flag_str() returns a string\r
-related to those processor flags, for debugging and information. The\r
-bitfield can be used elsewhere in the code as needed.\r
-\r
-\r
Cache Management\r
----------------\r
I haven't found a lot of speed improvement by attempting prefetch, and\r
/* ------------------------------------------------------------------------- */
void primitives_init_add(
- const primitives_hints_t *hints,
primitives_t *prims)
{
prims->add_16s = general_add_16s;
- primitives_init_add_opt(hints, prims);
+ primitives_init_add_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len);
-void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_add_opt(primitives_t *prims);
#endif /* !__PRIM_ADD_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
/* ------------------------------------------------------------------------- */
void primitives_init_add_opt(
- const primitives_hints_t *hints,
primitives_t *prims)
{
#ifdef WITH_IPP
prims->add_16s = (__add_16s_t) ippsAdd_16s;
#elif defined(WITH_SSE2)
- if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
- && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+ && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->add_16s = sse3_add_16s;
}
#endif
}
-
-
}
/* ------------------------------------------------------------------------- */
-void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_alphaComp(primitives_t* prims)
{
prims->alphaComp_argb = general_alphaComp_argb;
- primitives_init_alphaComp_opt(hints, prims);
+ primitives_init_alphaComp_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height);
-void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims);
+void primitives_init_alphaComp_opt(primitives_t* prims);
#endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#endif
/* ------------------------------------------------------------------------- */
-void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_alphaComp_opt(primitives_t* prims)
{
#ifdef WITH_IPP
prims->alphaComp_argb = ipp_alphaComp_argb;
#elif defined(WITH_SSE2)
- if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
- && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+ && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->alphaComp_argb = sse2_alphaComp_argb;
}
/* ------------------------------------------------------------------------- */
void primitives_init_andor(
- const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->andC_32u = general_andC_32u;
prims->orC_32u = general_orC_32u;
- primitives_init_andor_opt(hints, prims);
+ primitives_init_andor_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
-void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_andor_opt(primitives_t *prims);
#endif /* !__PRIM_ANDOR_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
/* ------------------------------------------------------------------------- */
-void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_andor_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
prims->orC_32u = (__orC_32u_t) ippsOrC_32u;
#elif defined(WITH_SSE2)
- if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
- && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+ && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;
}
/* ------------------------------------------------------------------------- */
-void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_colors(primitives_t* prims)
{
prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
- primitives_init_colors_opt(hints, prims);
+ primitives_init_colors_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi);
pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
-void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims);
+void primitives_init_colors_opt(primitives_t* prims);
#endif /* !__PRIM_COLORS_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
*/
/* ------------------------------------------------------------------------- */
-void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_colors_opt(primitives_t* prims)
{
#if defined(WITH_SSE2)
- if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
}
#elif defined(WITH_NEON)
- if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}
/* ------------------------------------------------------------------------- */
void primitives_init_copy(
- const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
: _mm_load_si128((__m128i *) (_ptr_)))
-/* This structure can (eventually) be used to provide hints to the
- * initialization routines, e.g. whether SSE2 or NEON or IPP instructions
- * or calls are available.
- */
-typedef struct
-{
- UINT32 x86_flags;
- UINT32 arm_flags;
-} primitives_hints_t;
-
/* Function prototypes for all the init/deinit routines. */
extern void primitives_init_copy(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_copy(
primitives_t *prims);
extern void primitives_init_set(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_set(
primitives_t *prims);
extern void primitives_init_add(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_add(
primitives_t *prims);
extern void primitives_init_andor(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_andor(
primitives_t *prims);
extern void primitives_init_shift(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_shift(
primitives_t *prims);
extern void primitives_init_sign(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_sign(
primitives_t *prims);
extern void primitives_init_alphaComp(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_alphaComp(
primitives_t *prims);
extern void primitives_init_colors(
- const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_colors(
primitives_t *prims);
/* ------------------------------------------------------------------------- */
void primitives_init_set(
- const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->set_32u = general_set_32u;
prims->zero = general_zero;
- primitives_init_set_opt(hints, prims);
+ primitives_init_set_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len);
-void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_set_opt(primitives_t *prims);
#endif /* !__PRIM_SET_H_INCLUDED__ */
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
# include <emmintrin.h>
#endif
/* ------------------------------------------------------------------------- */
-void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_set_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
#ifdef WITH_IPP
prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
prims->zero = (__zero_t) ippsZero_8u;
#elif defined(WITH_SSE2)
- if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;
/* ------------------------------------------------------------------------- */
void primitives_init_shift(
- const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->shiftC_16s = general_shiftC_16s;
prims->shiftC_16u = general_shiftC_16u;
- primitives_init_shift_opt(hints, prims);
+ primitives_init_shift_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len);
pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len);
-void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_shift_opt(primitives_t *prims);
#endif /* !__PRIM_SHIFT_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
*/
/* ------------------------------------------------------------------------- */
-void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_shift_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
#elif defined(WITH_SSE2)
- if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
- && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+ && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;
/* ------------------------------------------------------------------------- */
void primitives_init_sign(
- const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->sign_16s = general_sign_16s;
- primitives_init_sign_opt(hints, prims);
+ primitives_init_sign_opt(prims);
}
/* ------------------------------------------------------------------------- */
pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len);
-void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_sign_opt(primitives_t *prims);
#endif /* !__PRIM_SIGN_H_INCLUDED__ */
#include <freerdp/types.h>
#include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
#endif /* WITH_SSE2 */
/* ------------------------------------------------------------------------- */
-void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_sign_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
/* I didn't spot an IPP version of this. */
#if defined(WITH_SSE2)
- if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
- && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+ && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->sign_16s = ssse3_sign_16s;
}
#include <string.h>
#include <stdlib.h>
-#include <winpr/platform.h>
-
#include <freerdp/primitives.h>
#include "prim_internal.h"
-#ifdef __ANDROID__
-#include "cpu-features.h"
-#endif
-
/* Singleton pointer used throughout the program when requested. */
static primitives_t* pPrimitives = NULL;
-#define D_BIT_MMX (1<<23)
-#define D_BIT_SSE (1<<25)
-#define D_BIT_SSE2 (1<<26)
-#define D_BIT_3DN (1<<30)
-#define C_BIT_SSE3 (1<<0)
-#define C_BIT_3DNP (1<<8)
-#define C_BIT_SSSE3 (1<<9)
-#define C_BIT_SSE41 (1<<19)
-#define C_BIT_SSE42 (1<<20)
-#define C_BIT_XGETBV (1<<27)
-#define C_BIT_AVX (1<<28)
-#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
-#define E_BIT_XMM (1<<1)
-#define E_BIT_YMM (1<<2)
-#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
-#define C_BIT_FMA (1<<11)
-#define C_BIT_AVX_AES (1<<24)
-
-/* If x86 */
-#if defined(_M_IX86_AMD64)
-
-/* If GCC */
-#ifdef __GNUC__
-
-#ifdef __AVX__
-#define xgetbv(_func_, _lo_, _hi_) \
- __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
-#endif
-
-static void cpuid(
- unsigned info,
- unsigned *eax,
- unsigned *ebx,
- unsigned *ecx,
- unsigned *edx)
-{
- *eax = *ebx = *ecx = *edx = 0;
-
- __asm volatile
- (
- /* The EBX (or RBX register on x86_64) is used for the PIC base address
- * and must not be corrupted by our inline assembly.
- */
-#ifdef _M_IX86
- "mov %%ebx, %%esi;"
- "cpuid;"
- "xchg %%ebx, %%esi;"
-#else
- "mov %%rbx, %%rsi;"
- "cpuid;"
- "xchg %%rbx, %%rsi;"
-#endif
- : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
- : "0" (info)
- );
-}
-
-static void set_hints(primitives_hints_t* hints)
-{
- unsigned a, b, c, d;
-
- cpuid(1, &a, &b, &c, &d);
-
- if (d & D_BIT_MMX)
- hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
- if (d & D_BIT_SSE)
- hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
- if (d & D_BIT_SSE2)
- hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
- if (d & D_BIT_3DN)
- hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
- if (c & C_BIT_3DNP)
- hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
- if (c & C_BIT_SSE3)
- hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
- if (c & C_BIT_SSSE3)
- hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
- if (c & C_BIT_SSE41)
- hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
- if (c & C_BIT_SSE42)
- hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
-
-#ifdef __AVX__
- if ((c & C_BITS_AVX) == C_BITS_AVX)
- {
- int e, f;
- xgetbv(0, e, f);
-
- if ((e & E_BITS_AVX) == E_BITS_AVX)
- {
- hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
-
- if (c & C_BIT_FMA)
- hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
- if (c & C_BIT_AVX_AES)
- hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
- }
- }
- /* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
-#endif
-}
-
-#else
-
-static void set_hints(primitives_hints_t* hints)
-{
- /* x86 non-GCC: TODO */
-}
-
-#endif /* __GNUC__ */
-
-/* ------------------------------------------------------------------------- */
-
-#elif defined(_M_ARM)
-
-static UINT32 getNeonSupport(void)
-{
-#ifdef __ANDROID__
- if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
-
- UINT64 features = android_getCpuFeatures();
-
- if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
- {
- if (features & ANDROID_CPU_ARM_FEATURE_NEON)
- {
- return PRIM_ARM_NEON_AVAILABLE;
- }
- }
-#elif defined(__APPLE)
- /* assume NEON support on iOS devices */
- return PRIM_ARM_NEON_AVAILABLE;
-#endif
- return 0;
-}
-
-static void set_hints(primitives_hints_t* hints)
-{
- /* ARM: TODO */
- hints->arm_flags |= getNeonSupport();
-}
-
-#else
-static void set_hints(
- primitives_hints_t *hints)
-{
-}
-#endif /* x86 else ARM else */
-
/* ------------------------------------------------------------------------- */
void primitives_init(void)
{
- primitives_hints_t* hints;
-
if (pPrimitives == NULL)
{
pPrimitives = calloc(1, sizeof(primitives_t));
return;
}
- hints = calloc(1, sizeof(primitives_hints_t));
- set_hints(hints);
- pPrimitives->hints = (void *) hints;
-
/* Now call each section's initialization routine. */
- primitives_init_add(hints, pPrimitives);
- primitives_init_andor(hints, pPrimitives);
- primitives_init_alphaComp(hints, pPrimitives);
- primitives_init_copy(hints, pPrimitives);
- primitives_init_set(hints, pPrimitives);
- primitives_init_shift(hints, pPrimitives);
- primitives_init_sign(hints, pPrimitives);
- primitives_init_colors(hints, pPrimitives);
+ primitives_init_add(pPrimitives);
+ primitives_init_andor(pPrimitives);
+ primitives_init_alphaComp(pPrimitives);
+ primitives_init_copy(pPrimitives);
+ primitives_init_set(pPrimitives);
+ primitives_init_shift(pPrimitives);
+ primitives_init_sign(pPrimitives);
+ primitives_init_colors(pPrimitives);
}
/* ------------------------------------------------------------------------- */
}
/* ------------------------------------------------------------------------- */
-UINT32 primitives_get_flags(const primitives_t* prims)
-{
- primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
-
-#if defined(_M_IX86_AMD64)
- return hints->x86_flags;
-#elif defined(_M_ARM)
- return hints->arm_flags;
-#else
- return 0;
-#endif
-}
-
-/* ------------------------------------------------------------------------- */
-
-typedef struct
-{
- UINT32 flag;
- const char *str;
-} flagpair_t;
-
-static const flagpair_t x86_flags[] =
-{
- { PRIM_X86_MMX_AVAILABLE, "MMX" },
- { PRIM_X86_3DNOW_AVAILABLE, "3DNow" },
- { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" },
- { PRIM_X86_SSE_AVAILABLE, "SSE" },
- { PRIM_X86_SSE2_AVAILABLE, "SSE2" },
- { PRIM_X86_SSE3_AVAILABLE, "SSE3" },
- { PRIM_X86_SSSE3_AVAILABLE, "SSSE3" },
- { PRIM_X86_SSE41_AVAILABLE, "SSE4.1" },
- { PRIM_X86_SSE42_AVAILABLE, "SSE4.2" },
- { PRIM_X86_AVX_AVAILABLE, "AVX" },
- { PRIM_X86_FMA_AVAILABLE, "FMA" },
- { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" },
- { PRIM_X86_AVX2_AVAILABLE, "AVX2" },
-};
-
-static const flagpair_t arm_flags[] =
-{
- { PRIM_ARM_VFP1_AVAILABLE, "VFP1" },
- { PRIM_ARM_VFP2_AVAILABLE, "VFP2" },
- { PRIM_ARM_VFP3_AVAILABLE, "VFP3" },
- { PRIM_ARM_VFP4_AVAILABLE, "VFP4" },
- { PRIM_ARM_FPA_AVAILABLE, "FPA" },
- { PRIM_ARM_FPE_AVAILABLE, "FPE" },
- { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" },
- { PRIM_ARM_NEON_AVAILABLE, "NEON" },
-};
-
-void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
-{
- int i;
- primitives_hints_t* hints;
-
- *str = '\0';
- --len; /* for the '/0' */
-
- hints = (primitives_hints_t*) (prims->hints);
-
- for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
- {
- if (hints->x86_flags & x86_flags[i].flag)
- {
- int slen = strlen(x86_flags[i].str) + 1;
-
- if (len < slen)
- break;
-
- if (*str != '\0')
- strcat(str, " ");
-
- strcat(str, x86_flags[i].str);
- len -= slen;
- }
- }
-
- for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
- {
- if (hints->arm_flags & arm_flags[i].flag)
- {
- int slen = strlen(arm_flags[i].str) + 1;
-
- if (len < slen)
- break;
-
- if (*str != '\0')
- strcat(str, " ");
-
- strcat(str, arm_flags[i].str);
- len -= slen;
- }
- }
-}
-
-/* ------------------------------------------------------------------------- */
void primitives_deinit(void)
{
if (pPrimitives == NULL)
primitives_deinit_sign(pPrimitives);
primitives_deinit_colors(pPrimitives);
- if (pPrimitives->hints != NULL)
- free((void*) (pPrimitives->hints));
-
free((void*) pPrimitives);
pPrimitives = NULL;
}
test_set.c
test_shift.c
test_sign.c
- ../prim_add.c
- ../prim_andor.c
- ../prim_alphaComp.c
- ../prim_colors.c
- ../prim_copy.c
- ../prim_set.c
- ../prim_shift.c
- ../prim_sign.c
- ../prim_add_opt.c
- ../prim_alphaComp_opt.c
- ../prim_andor_opt.c
- ../prim_colors_opt.c
- ../prim_set_opt.c
- ../prim_shift_opt.c
- ../prim_sign_opt.c
- ../primitives.c
)
set(PRIMITIVE_TEST_HEADERS
measure.h
prim_test.h
- ../prim_internal.h
)
set(PRIMITIVE_TEST_SRCS
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
-target_link_libraries(prim_test rt)
+target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo)
if(NOT TESTING_OUTPUT_DIRECTORY)
set(TESTING_OUTPUT_DIRECTORY .)
endif()
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <winpr/platform.h>
+#include <winpr/sysinfo.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
int Quiet = 0;
+
+
+/* ------------------------------------------------------------------------- */
+typedef struct
+{
+ UINT32 flag;
+ const char *str;
+} flagpair_t;
+
+static const flagpair_t flags[] =
+#ifdef _M_IX86_AMD64
+{
+ { PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" },
+ { PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" },
+ { PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" },
+ { PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" },
+ { PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" },
+#elif defined(_M_ARM)
+ { PF_ARM_VFP3, "VFP3" },
+ { PF_ARM_INTEL_WMMX, "IWMMXT" },
+ { PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" },
+#endif
+};
+
+static const flagpair_t flags_extended[] =
+{
+#ifdef _M_IX86_AMD64
+ { PF_EX_3DNOW_PREFETCH, "3DNow-PF" },
+ { PF_EX_SSSE3, "SSSE3" },
+ { PF_EX_SSE41, "SSE4.1" },
+ { PF_EX_SSE42, "SSE4.2" },
+ { PF_EX_AVX, "AVX" },
+ { PF_EX_FMA, "FMA" },
+ { PF_EX_AVX_AES, "AVX-AES" },
+ { PF_EX_AVX2, "AVX2" },
+#elif defined(_M_ARM)
+ { PF_EX_ARM_VFP1, "VFP1"},
+ { PF_EX_ARM_VFP4, "VFP4" },
+#endif
+};
+
+void primitives_flags_str(char* str, size_t len)
+{
+ int i;
+
+ *str = '\0';
+ --len; /* for the '/0' */
+
+ for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i)
+ {
+ if (IsProcessorFeaturePresent(flags[i].flag))
+ {
+ int slen = strlen(flags[i].str) + 1;
+
+ if (len < slen)
+ break;
+
+ if (*str != '\0')
+ strcat(str, " ");
+
+ strcat(str, flags[i].str);
+ len -= slen;
+ }
+ }
+ for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i)
+ {
+ if (IsProcessorFeaturePresent(flags_extended[i].flag))
+ {
+ int slen = strlen(flags_extended[i].str) + 1;
+
+ if (len < slen)
+ break;
+
+ if (*str != '\0')
+ strcat(str, " ");
+
+ strcat(str, flags_extended[i].str);
+ len -= slen;
+ }
+ }
+}
+
/* ------------------------------------------------------------------------- */
static void get_random_data_lrand(
void *buffer,
int main(int argc, char** argv)
{
int i;
- char hints[256];
+ char hints[1024];
UINT32 testSet = 0;
UINT32 testTypes = 0;
int results = SUCCESS;
primitives_init();
- primitives_flags_str(primitives_get(), hints, sizeof(hints));
+ primitives_flags_str(hints, sizeof(hints));
printf("Hints: %s\n", hints);
/* COPY */
#include <stdio.h>
#include <freerdp/primitives.h>
+#include <winpr/platform.h>
#ifdef WITH_IPP
#include <ipps.h>
} \
} while (0)
-#if defined(i386) && defined(WITH_SSE2)
-#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
+#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON))
+#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
- MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
- _funcSSE_); \
+ MEASURE_TIMED(label, iter, test_time, resultOpt[s], \
+ _funcOpt_); \
} \
} while (0)
#else
-#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
+#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
#endif
-#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
-#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
- do { \
- for (s=0; s<num_sizes; ++s) \
- { \
- int iter; \
- char label[256]; \
- int size = size_array[s]; \
- _prework_; \
- iter = iterations/size; \
- sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
- MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
- _funcNeon_); \
- } \
- } while (0)
-#else
-#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
-#endif
-
-#if defined(i386) && defined(WITH_IPP)
+#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
#define STD_SPEED_TEST( \
_name_, _srctype_, _dsttype_, _prework_, \
_doNormal_, _funcNormal_, \
- _doSSE_, _funcSSE_, _flagsSSE_, \
- _doNeon_, _funcNeon_, _flagsNeon_, \
+ _doOpt_, _funcOpt_, _flagOpt_, _flagExt_, \
_doIPP_, _funcIPP_) \
static void _name_( \
const char *oplabel, const char *type, \
int iterations, float test_time) \
{ \
int s; \
- float *resultNormal, *resultSSENeon, *resultIPP; \
- UINT32 pflags = primitives_get_flags(primitives_get()); \
+ float *resultNormal, *resultOpt, *resultIPP; \
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
- resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
+ resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
printf("******************** %s %s ******************\n", \
oplabel, type); \
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
- if (_doSSE_) { \
- if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
+ if (_doOpt_) \
+ { \
+ if (_flagExt_) \
{ \
- DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
+ if (IsProcessorFeaturePresentEx(_flagOpt_)) \
+ { \
+ DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
+ } \
} \
- } \
- if (_doNeon_) { \
- if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
+ else \
{ \
- DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
+ if (IsProcessorFeaturePresent(_flagOpt_)) \
+ { \
+ DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
+ } \
} \
} \
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
- if (resultSSENeon[s] > 0.0) \
+ if (resultOpt[s] > 0.0) \
{ \
- _floatprint(resultSSENeon[s], sSN); \
+ _floatprint(resultOpt[s], sSN); \
if (resultNormal[s] > 0.0) \
{ \
sprintf(sSNp, "%d%%", \
- (int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
+ (int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
} \
} \
if (resultIPP[s] > 0.0) \
printf("%8d: %15s %15s %5s %15s %5s\n", \
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
} \
- free(resultNormal); free(resultSSENeon); free(resultIPP); \
+ free(resultNormal); free(resultOpt); free(resultIPP); \
}
#endif // !__PRIMTEST_H_INCLUDED__
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
int failed = 0;
int i;
char testStr[256];
- UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
get_random_data(src1, sizeof(src1));
memset(d2, 0, sizeof(d2));
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE3_AVAILABLE)
+ if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_add_16s(src1, src2, dst, size),
- TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsAdd_16s(src1, src2, dst, size));
int test_add16s_speed(void)
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
int error = 0;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
UINT32 *ptr;
int i, x, y;
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
error = 1;
}
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
if (colordist(c0, c2) > TOLERANCE)
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size),
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
- size, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size));
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
}
}
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE3_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_andC_32u(src1, constant, dst, size),
- TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsAndC_32u(src1, constant, dst, size))
int test_and_32u_speed(void)
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
}
}
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE3_AVAILABLE)
+ if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_orC_32u(src1, constant, dst, size),
- TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsOrC_32u(src1, constant, dst, size))
int test_or_32u_speed(void)
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int RGB_TRIAL_ITERATIONS = 1000;
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
int i;
int failed = 0;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
INT16 *ptrs[3];
prim_size_t roi = { 64, 64 };
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out1, 64*4, &roi);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
- PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
FALSE, dst=dst);
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
int i;
int failed = 0;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
const INT16 *in[3];
INT16 *out1[3];
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
- PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
FALSE, dst=dst);
int test_yCbCrToRGB_16s16s_P3P3_speed(void)
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memcpy(dst, src1, size),
- FALSE, NULL, 0,
- FALSE, NULL, 0,
+ FALSE, NULL, 0, FALSE,
TRUE, ippsCopy_8u(src1, dst, size));
int test_copy8u_speed(void)
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
int failed = 0;
int off;
char testStr[256];
- UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memset(dst, constant, size),
- FALSE, NULL, 0,
- FALSE, NULL, 0,
+ FALSE, NULL, 0, FALSE,
TRUE, ippsSet_8u(constant, dst, size));
int test_set8u_speed(void)
/* ------------------------------------------------------------------------- */
int test_set32s_func(void)
{
- primitives_t* prims = primitives_get();
INT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
- UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
/* ------------------------------------------------------------------------- */
int test_set32u_func(void)
{
- primitives_t* prims = primitives_get();
UINT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
- UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
- if (pflags & PRIM_X86_SSE2_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, memset32u_naive(constant, dst, size),
- TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ipp_wrapper_set_32u(constant, dst, size));
int test_set32u_speed(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
TRUE, memset32s_naive(constant, dst, size),
- TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsSet_32s(constant, dst, size));
int test_set32s_speed(void)
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
int failed = 0; \
int i; \
- UINT32 pflags = primitives_get_flags(primitives_get()); \
char testStr[256]; \
testStr[0] = '\0'; \
get_random_data(src, sizeof(src)); \
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
- if (pflags & PRIM_X86_SSE3_AVAILABLE) \
+ if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
{ \
strcat(testStr, " SSE3"); \
/* Aligned */ \
/* ========================================================================= */
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
TRUE, general_lShiftC_16s(src1, constant, dst, size),
- TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_lShiftC_16u(src1, constant, dst, size),
- TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
TRUE, general_rShiftC_16s(src1, constant, dst, size),
- TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_rShiftC_16u(src1, constant, dst, size),
- TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
/* ------------------------------------------------------------------------- */
#include "config.h"
#endif
+#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int SIGN_PRETEST_ITERATIONS = 100000;
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
int failed = 0;
int i;
- UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
/* Test when we can reach 16-byte alignment */
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+1, 65535);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSSE3_AVAILABLE)
+ if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
strcat(testStr, " SSSE3");
ssse3_sign_16s(src+1, d2+1, 65535);
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+2, 65535);
#ifdef _M_IX86_AMD64
- if (pflags & PRIM_X86_SSSE3_AVAILABLE)
+ if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
ssse3_sign_16s(src+1, d2+2, 65535);
for (i=2; i<65535; ++i)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_sign_16s(src1, dst, size),
- TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
- FALSE, dst=dst, 0,
+ TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
FALSE, dst=dst);
int test_sign16s_speed(void)