primitives: make use of winprs processor feature detection
authorBernhard Miklautz <bmiklautz@thinstuff.at>
Wed, 27 Feb 2013 14:58:06 +0000 (15:58 +0100)
committerBernhard Miklautz <bmiklautz@thinstuff.at>
Fri, 1 Mar 2013 08:02:14 +0000 (09:02 +0100)
- Removed the cpu flag detection
- Fixed and updated the tests

37 files changed:
include/freerdp/primitives.h
libfreerdp/primitives/README.txt
libfreerdp/primitives/prim_add.c
libfreerdp/primitives/prim_add.h
libfreerdp/primitives/prim_add_opt.c
libfreerdp/primitives/prim_alphaComp.c
libfreerdp/primitives/prim_alphaComp.h
libfreerdp/primitives/prim_alphaComp_opt.c
libfreerdp/primitives/prim_andor.c
libfreerdp/primitives/prim_andor.h
libfreerdp/primitives/prim_andor_opt.c
libfreerdp/primitives/prim_colors.c
libfreerdp/primitives/prim_colors.h
libfreerdp/primitives/prim_colors_opt.c
libfreerdp/primitives/prim_copy.c
libfreerdp/primitives/prim_internal.h
libfreerdp/primitives/prim_set.c
libfreerdp/primitives/prim_set.h
libfreerdp/primitives/prim_set_opt.c
libfreerdp/primitives/prim_shift.c
libfreerdp/primitives/prim_shift.h
libfreerdp/primitives/prim_shift_opt.c
libfreerdp/primitives/prim_sign.c
libfreerdp/primitives/prim_sign.h
libfreerdp/primitives/prim_sign_opt.c
libfreerdp/primitives/primitives.c
libfreerdp/primitives/test/CMakeLists.txt
libfreerdp/primitives/test/prim_test.c
libfreerdp/primitives/test/prim_test.h
libfreerdp/primitives/test/test_add.c
libfreerdp/primitives/test/test_alphaComp.c
libfreerdp/primitives/test/test_andor.c
libfreerdp/primitives/test/test_colors.c
libfreerdp/primitives/test/test_copy.c
libfreerdp/primitives/test/test_set.c
libfreerdp/primitives/test/test_shift.c
libfreerdp/primitives/test/test_sign.c

index 8bcd14c..24bac47 100644 (file)
@@ -190,9 +190,6 @@ typedef struct
        __yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
        __RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
        __RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
-
-       /* internal use for CPU flags and such. */
-       void *hints;
 } primitives_t;
 
 #ifdef __cplusplus
@@ -202,12 +199,6 @@ extern "C" {
 /* Prototypes for the externally-visible entrypoints. */
 FREERDP_API void primitives_init(void);
 FREERDP_API primitives_t *primitives_get(void);
-FREERDP_API UINT32 primitives_get_flags(
-       const primitives_t *prims);
-FREERDP_API void primitives_flags_str(
-       const primitives_t *prims,
-       char *str,
-       size_t len);
 FREERDP_API void primitives_deinit(void);
 
 #ifdef __cplusplus
index 369102c..81c7e97 100644 (file)
@@ -62,10 +62,7 @@ New Optimizations
 -----------------\r
 As the need arises, new optimizations can be added to the library,\r
 including NEON, AVX, and perhaps OpenCL or other SIMD implementations.\r
-The initialization routine is free to do any quick run-time test to\r
-determine which features are available before hooking the operation's\r
-function pointer, or it can simply look at the processor features list\r
-from the hints passed to the initialization routine.\r
+The CPU feature detection is done in winpr/sysinfo.\r
 \r
 \r
 Adding Entrypoints\r
@@ -85,15 +82,6 @@ be added.
      The template functions can frequently be used to extend the\r
      operations without writing a lot of new code.\r
 \r
-\r
-Flags\r
------\r
-The entrypoint primitives_get_flags() returns a bitfield of processor flags\r
-(as defined in primitives.h) and primitives_flag_str() returns a string\r
-related to those processor flags, for debugging and information.  The\r
-bitfield can be used elsewhere in the code as needed.\r
-\r
-\r
 Cache Management\r
 ----------------\r
 I haven't found a lot of speed improvement by attempting prefetch, and\r
index 258bcc6..4d5525b 100644 (file)
@@ -46,12 +46,11 @@ pstatus_t general_add_16s(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_add(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        prims->add_16s = general_add_16s;
 
-       primitives_init_add_opt(hints, prims);
+       primitives_init_add_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index 4ad4602..f1e143c 100644 (file)
@@ -24,7 +24,7 @@
 
 pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len);
 
-void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_add_opt(primitives_t *prims);
 
 #endif /* !__PRIM_ADD_H_INCLUDED__ */
 
index 2de0b8f..88a4fbc 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s,
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_add_opt(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
 #ifdef WITH_IPP
        prims->add_16s = (__add_16s_t) ippsAdd_16s;
 #elif defined(WITH_SSE2)
-       if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
-                       && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))        /* for LDDQU */
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+                       && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))   /* for LDDQU */
        {
                prims->add_16s = sse3_add_16s;
        }
 #endif
 }
-
-
index 24f9167..e48ce9d 100644 (file)
@@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb(
 }
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_alphaComp(primitives_t* prims)
 {
        prims->alphaComp_argb = general_alphaComp_argb;
 
-       primitives_init_alphaComp_opt(hints, prims);
+       primitives_init_alphaComp_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index 5059116..a163357 100644 (file)
@@ -24,7 +24,7 @@
 
 pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height);
 
-void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims);
+void primitives_init_alphaComp_opt(primitives_t* prims);
 
 #endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */
 
index 5550fcb..52e33fb 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb(
 #endif
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_alphaComp_opt(primitives_t* prims)
 {
 #ifdef WITH_IPP
        prims->alphaComp_argb = ipp_alphaComp_argb;
 #elif defined(WITH_SSE2)
-       if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
-                       && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))        /* for LDDQU */
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+                       && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))    /* for LDDQU */
        {
                prims->alphaComp_argb = sse2_alphaComp_argb;
        }
index 0b8092f..4c1923f 100644 (file)
@@ -61,14 +61,13 @@ pstatus_t general_orC_32u(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_andor(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        /* Start with the default. */
        prims->andC_32u = general_andC_32u;
        prims->orC_32u  = general_orC_32u;
 
-       primitives_init_andor_opt(hints, prims);
+       primitives_init_andor_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index 6a2e7ac..9762f22 100644 (file)
@@ -25,7 +25,7 @@
 pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
 pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
 
-void primitives_init_andor_opt(const primitives_hints_t *hints,        primitives_t *prims);
+void primitives_init_andor_opt(primitives_t *prims);
 
 #endif /* !__PRIM_ANDOR_H_INCLUDED__ */
 
index e0ce1ea..8d74f30 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u,
 
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_andor_opt(const primitives_hints_t *hints,        primitives_t *prims)
+void primitives_init_andor_opt(primitives_t *prims)
 {
 #if defined(WITH_IPP)
        prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
        prims->orC_32u  = (__orC_32u_t) ippsOrC_32u;
 #elif defined(WITH_SSE2)
-       if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
-                       && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+                       && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                prims->andC_32u = sse3_andC_32u;
                prims->orC_32u  = sse3_orC_32u;
index 179e569..aae98e2 100644 (file)
@@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R(
 }
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_colors(primitives_t* prims)
 {
        prims->RGBToRGB_16s8u_P3AC4R  = general_RGBToRGB_16s8u_P3AC4R;
        prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
        prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
 
-       primitives_init_colors_opt(hints, prims);
+       primitives_init_colors_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index 70f4785..15b76d9 100644 (file)
@@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN
 pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi);
 pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
 
-void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims);
+void primitives_init_colors_opt(primitives_t* prims);
 
 #endif /* !__PRIM_COLORS_H_INCLUDED__ */
 
index cfc8741..3dcd889 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
  */
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims)
+void primitives_init_colors_opt(primitives_t* prims)
 {
 #if defined(WITH_SSE2)
-       if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                prims->RGBToRGB_16s8u_P3AC4R  = sse2_RGBToRGB_16s8u_P3AC4R;
                prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
                prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
        }
 #elif defined(WITH_NEON)
-       if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
        {
                prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
        }
index 4198f2d..95755a0 100644 (file)
@@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_copy(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        /* Start with the default. */
index 001ab85..06418fb 100644 (file)
                ? _mm_lddqu_si128((__m128i *) (_ptr_)) \
                : _mm_load_si128((__m128i *) (_ptr_)))
 
-/* This structure can (eventually) be used to provide hints to the
- * initialization routines, e.g. whether SSE2 or NEON or IPP instructions
- * or calls are available.
- */
-typedef struct
-{
-       UINT32 x86_flags;
-       UINT32 arm_flags;
-} primitives_hints_t;
-
 /* Function prototypes for all the init/deinit routines. */
 extern void primitives_init_copy(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_copy(
        primitives_t *prims);
 
 extern void primitives_init_set(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_set(
        primitives_t *prims);
 
 extern void primitives_init_add(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_add(
        primitives_t *prims);
 
 extern void primitives_init_andor(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_andor(
        primitives_t *prims);
 
 extern void primitives_init_shift(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_shift(
        primitives_t *prims);
 
 extern void primitives_init_sign(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_sign(
        primitives_t *prims);
 
 extern void primitives_init_alphaComp(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_alphaComp(
        primitives_t *prims);
 
 extern void primitives_init_colors(
-       const primitives_hints_t *hints,
        primitives_t *prims);
 extern void primitives_deinit_colors(
        primitives_t *prims);
index 9176c87..967df7b 100644 (file)
@@ -111,7 +111,6 @@ pstatus_t general_set_32u(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_set(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        /* Start with the default. */
@@ -120,7 +119,7 @@ void primitives_init_set(
        prims->set_32u = general_set_32u;
        prims->zero = general_zero;
 
-       primitives_init_set_opt(hints, prims);
+       primitives_init_set_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index e4504dc..0e2be1e 100644 (file)
@@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len);
 pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len);
 
 
-void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_set_opt(primitives_t *prims);
 
 #endif /* !__PRIM_SET_H_INCLUDED__ */
 
index 0523434..08b0f7e 100644 (file)
@@ -21,6 +21,7 @@
 #include <string.h>
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 # include <emmintrin.h>
@@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u(
 #endif
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_set_opt(primitives_t *prims)
 {
        /* Pick tuned versions if possible. */
 #ifdef WITH_IPP
@@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim
        prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
        prims->zero = (__zero_t) ippsZero_8u;
 #elif defined(WITH_SSE2)
-       if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                prims->set_8u  = sse2_set_8u;
                prims->set_32s = sse2_set_32s;
index bd26dc0..e89b2e0 100644 (file)
@@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_shift(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        /* Start with the default. */
@@ -117,7 +116,7 @@ void primitives_init_shift(
        prims->shiftC_16s  = general_shiftC_16s;
        prims->shiftC_16u  = general_shiftC_16u;
 
-       primitives_init_shift_opt(hints, prims);
+       primitives_init_shift_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index cad0540..a26a503 100644 (file)
@@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32
 pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len);
 pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len);
 
-void primitives_init_shift_opt(const primitives_hints_t *hints,        primitives_t *prims);
+void primitives_init_shift_opt(primitives_t *prims);
 
 #endif /* !__PRIM_SHIFT_H_INCLUDED__ */
 
index 0e57da2..9cdb33d 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u,
  */
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_shift_opt(const primitives_hints_t *hints,        primitives_t *prims)
+void primitives_init_shift_opt(primitives_t *prims)
 {
 #if defined(WITH_IPP)
        prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
@@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints,       primitives_t *pr
        prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
        prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
 #elif defined(WITH_SSE2)
-       if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
-                       && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+                       && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                prims->lShiftC_16s = sse2_lShiftC_16s;
                prims->rShiftC_16s = sse2_rShiftC_16s;
index d7d2eb0..8b2bfa9 100644 (file)
@@ -42,13 +42,12 @@ pstatus_t general_sign_16s(
 
 /* ------------------------------------------------------------------------- */
 void primitives_init_sign(
-       const primitives_hints_t *hints,
        primitives_t *prims)
 {
        /* Start with the default. */
        prims->sign_16s = general_sign_16s;
 
-       primitives_init_sign_opt(hints, prims);
+       primitives_init_sign_opt(prims);
 }
 
 /* ------------------------------------------------------------------------- */
index 3592990..f43eca2 100644 (file)
@@ -24,7 +24,7 @@
 
 pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len);
 
-void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims);
+void primitives_init_sign_opt(primitives_t *prims);
 
 #endif /* !__PRIM_SIGN_H_INCLUDED__ */
 
index 81842b9..643a75b 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
+#include <winpr/sysinfo.h>
 
 #ifdef WITH_SSE2
 #include <emmintrin.h>
@@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s(
 #endif /* WITH_SSE2 */
 
 /* ------------------------------------------------------------------------- */
-void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims)
+void primitives_init_sign_opt(primitives_t *prims)
 {
        /* Pick tuned versions if possible. */
        /* I didn't spot an IPP version of this. */
 #if defined(WITH_SSE2)
-       if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
-                       && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
+                       && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                prims->sign_16s  = ssse3_sign_16s;
        }
index 245a6ce..2bb05d6 100644 (file)
 #include <string.h>
 #include <stdlib.h>
 
-#include <winpr/platform.h>
-
 #include <freerdp/primitives.h>
 
 #include "prim_internal.h"
 
-#ifdef __ANDROID__
-#include "cpu-features.h"
-#endif
-
 /* Singleton pointer used throughout the program when requested. */
 static primitives_t* pPrimitives = NULL;
 
-#define D_BIT_MMX              (1<<23)
-#define D_BIT_SSE              (1<<25)
-#define D_BIT_SSE2             (1<<26)
-#define D_BIT_3DN              (1<<30)
-#define C_BIT_SSE3             (1<<0)
-#define C_BIT_3DNP             (1<<8)
-#define C_BIT_SSSE3            (1<<9)
-#define C_BIT_SSE41            (1<<19)
-#define C_BIT_SSE42            (1<<20)
-#define C_BIT_XGETBV           (1<<27)
-#define C_BIT_AVX              (1<<28)
-#define C_BITS_AVX             (C_BIT_XGETBV|C_BIT_AVX)
-#define E_BIT_XMM              (1<<1)
-#define E_BIT_YMM              (1<<2)
-#define E_BITS_AVX             (E_BIT_XMM|E_BIT_YMM)
-#define C_BIT_FMA              (1<<11)
-#define C_BIT_AVX_AES          (1<<24)
-
-/* If x86 */
-#if defined(_M_IX86_AMD64)
-
-/* If GCC */
-#ifdef __GNUC__
-
-#ifdef __AVX__
-#define xgetbv(_func_, _lo_, _hi_) \
-       __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
-#endif
-
-static void cpuid(
-       unsigned info, 
-       unsigned *eax, 
-       unsigned *ebx, 
-       unsigned *ecx, 
-       unsigned *edx)
-{
-       *eax = *ebx = *ecx = *edx = 0;
-
-       __asm volatile
-       (
-               /* The EBX (or RBX register on x86_64) is used for the PIC base address
-                * and must not be corrupted by our inline assembly.
-                */
-#ifdef _M_IX86
-               "mov %%ebx, %%esi;"
-               "cpuid;"
-               "xchg %%ebx, %%esi;"
-#else
-               "mov %%rbx, %%rsi;"
-               "cpuid;"
-               "xchg %%rbx, %%rsi;"
-#endif
-               : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
-               : "0" (info)
-       );
-}
-
-static void set_hints(primitives_hints_t* hints)
-{
-       unsigned a, b, c, d;
-
-       cpuid(1, &a, &b, &c, &d);
-
-       if (d & D_BIT_MMX)
-               hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
-       if (d & D_BIT_SSE)
-               hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
-       if (d & D_BIT_SSE2)
-               hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
-       if (d & D_BIT_3DN)
-               hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
-       if (c & C_BIT_3DNP)
-               hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
-       if (c & C_BIT_SSE3)
-               hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
-       if (c & C_BIT_SSSE3)
-               hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
-       if (c & C_BIT_SSE41)
-               hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
-       if (c & C_BIT_SSE42)
-               hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
-
-#ifdef __AVX__
-       if ((c & C_BITS_AVX) == C_BITS_AVX)
-       {
-               int e, f;
-               xgetbv(0, e, f);
-
-               if ((e & E_BITS_AVX) == E_BITS_AVX)
-               {
-                       hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
-
-                       if (c & C_BIT_FMA)
-                               hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
-                       if (c & C_BIT_AVX_AES)
-                               hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
-               }
-       }
-       /* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
-#endif
-}
-
-#else
-
-static void set_hints(primitives_hints_t* hints)
-{
-       /* x86 non-GCC:  TODO */
-}
-
-#endif /* __GNUC__ */
-
-/* ------------------------------------------------------------------------- */
-
-#elif defined(_M_ARM)
-
-static UINT32 getNeonSupport(void)
-{
-#ifdef __ANDROID__
-       if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
-
-       UINT64 features = android_getCpuFeatures();
-
-       if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
-       {
-               if (features & ANDROID_CPU_ARM_FEATURE_NEON)
-               {
-                       return PRIM_ARM_NEON_AVAILABLE;
-               }
-       }
-#elif defined(__APPLE)
-       /* assume NEON support on iOS devices */
-       return PRIM_ARM_NEON_AVAILABLE;
-#endif
-       return 0;
-}
-
-static void set_hints(primitives_hints_t* hints)
-{
-       /* ARM:  TODO */
-       hints->arm_flags |= getNeonSupport();
-}
-
-#else
-static void set_hints(
-       primitives_hints_t *hints)
-{
-}
-#endif /* x86 else ARM else */
-
 /* ------------------------------------------------------------------------- */
 void primitives_init(void)
 {
-       primitives_hints_t* hints;
-
        if (pPrimitives == NULL)
        {
                pPrimitives = calloc(1, sizeof(primitives_t));
@@ -197,19 +40,15 @@ void primitives_init(void)
                        return;
        }
 
-       hints = calloc(1, sizeof(primitives_hints_t));
-       set_hints(hints);
-       pPrimitives->hints = (void *) hints;
-
        /* Now call each section's initialization routine. */
-       primitives_init_add(hints, pPrimitives);
-       primitives_init_andor(hints, pPrimitives);
-       primitives_init_alphaComp(hints, pPrimitives);
-       primitives_init_copy(hints, pPrimitives);
-       primitives_init_set(hints, pPrimitives);
-       primitives_init_shift(hints, pPrimitives);
-       primitives_init_sign(hints, pPrimitives);
-       primitives_init_colors(hints, pPrimitives);
+       primitives_init_add(pPrimitives);
+       primitives_init_andor(pPrimitives);
+       primitives_init_alphaComp(pPrimitives);
+       primitives_init_copy(pPrimitives);
+       primitives_init_set(pPrimitives);
+       primitives_init_shift(pPrimitives);
+       primitives_init_sign(pPrimitives);
+       primitives_init_colors(pPrimitives);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -222,102 +61,6 @@ primitives_t* primitives_get(void)
 }
 
 /* ------------------------------------------------------------------------- */
-UINT32 primitives_get_flags(const primitives_t* prims)
-{
-       primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
-
-#if defined(_M_IX86_AMD64)
-       return hints->x86_flags;
-#elif defined(_M_ARM)
-       return hints->arm_flags;
-#else
-       return 0;
-#endif
-}
-
-/* ------------------------------------------------------------------------- */
-
-typedef struct
-{
-       UINT32  flag;
-       const char *str;
-} flagpair_t;
-
-static const flagpair_t x86_flags[] =
-{
-       { PRIM_X86_MMX_AVAILABLE,                               "MMX" },
-       { PRIM_X86_3DNOW_AVAILABLE,                             "3DNow" },
-       { PRIM_X86_3DNOW_PREFETCH_AVAILABLE,    "3DNow-PF" },
-       { PRIM_X86_SSE_AVAILABLE,                               "SSE" },
-       { PRIM_X86_SSE2_AVAILABLE,                              "SSE2" },
-       { PRIM_X86_SSE3_AVAILABLE,                              "SSE3" },
-       { PRIM_X86_SSSE3_AVAILABLE,                             "SSSE3" },
-       { PRIM_X86_SSE41_AVAILABLE,                             "SSE4.1" },
-       { PRIM_X86_SSE42_AVAILABLE,                             "SSE4.2" },
-       { PRIM_X86_AVX_AVAILABLE,                               "AVX" },
-       { PRIM_X86_FMA_AVAILABLE,                               "FMA" },
-       { PRIM_X86_AVX_AES_AVAILABLE,                   "AVX-AES" },
-       { PRIM_X86_AVX2_AVAILABLE,                              "AVX2" },
-};
-
-static const flagpair_t arm_flags[] =
-{
-       { PRIM_ARM_VFP1_AVAILABLE,                              "VFP1" },
-       { PRIM_ARM_VFP2_AVAILABLE,                              "VFP2" },
-       { PRIM_ARM_VFP3_AVAILABLE,                              "VFP3" },
-       { PRIM_ARM_VFP4_AVAILABLE,                              "VFP4" },
-       { PRIM_ARM_FPA_AVAILABLE,                               "FPA" },
-       { PRIM_ARM_FPE_AVAILABLE,                               "FPE" },
-       { PRIM_ARM_IWMMXT_AVAILABLE,                    "IWMMXT" },
-       { PRIM_ARM_NEON_AVAILABLE,                              "NEON" },
-};
-
-void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
-{
-       int i;
-       primitives_hints_t* hints;
-
-       *str = '\0';
-       --len;  /* for the '/0' */
-
-       hints = (primitives_hints_t*) (prims->hints);
-
-       for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
-       {
-               if (hints->x86_flags & x86_flags[i].flag)
-               {
-                       int slen = strlen(x86_flags[i].str) + 1;
-
-                       if (len < slen)
-                               break;
-
-                       if (*str != '\0')
-                               strcat(str, " ");
-
-                       strcat(str, x86_flags[i].str);
-                       len -= slen;
-               }
-       }
-
-       for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
-       {
-               if (hints->arm_flags & arm_flags[i].flag)
-               {
-                       int slen = strlen(arm_flags[i].str) + 1;
-
-                       if (len < slen)
-                               break;
-
-                       if (*str != '\0')
-                               strcat(str, " ");
-
-                       strcat(str, arm_flags[i].str);
-                       len -= slen;
-               }
-       }
-}
-
-/* ------------------------------------------------------------------------- */
 void primitives_deinit(void)
 {
        if (pPrimitives == NULL)
@@ -333,9 +76,6 @@ void primitives_deinit(void)
        primitives_deinit_sign(pPrimitives);
        primitives_deinit_colors(pPrimitives);
 
-       if (pPrimitives->hints != NULL)
-               free((void*) (pPrimitives->hints));
-
        free((void*) pPrimitives);
        pPrimitives = NULL;
 }
index 7030ac7..7383291 100644 (file)
@@ -31,28 +31,11 @@ set(PRIMITIVE_TEST_CFILES
        test_set.c
        test_shift.c
        test_sign.c
-       ../prim_add.c
-       ../prim_andor.c
-       ../prim_alphaComp.c
-       ../prim_colors.c
-       ../prim_copy.c
-       ../prim_set.c
-       ../prim_shift.c
-       ../prim_sign.c
-       ../prim_add_opt.c
-       ../prim_alphaComp_opt.c
-       ../prim_andor_opt.c
-       ../prim_colors_opt.c
-       ../prim_set_opt.c
-       ../prim_shift_opt.c
-       ../prim_sign_opt.c
-       ../primitives.c
     )
 
 set(PRIMITIVE_TEST_HEADERS
        measure.h
        prim_test.h
-       ../prim_internal.h
 )
 
 set(PRIMITIVE_TEST_SRCS
@@ -138,7 +121,7 @@ endif()
 
 set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
 
-target_link_libraries(prim_test rt)
+target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo)
 if(NOT TESTING_OUTPUT_DIRECTORY)
        set(TESTING_OUTPUT_DIRECTORY .)
 endif()
index 172d6ff..7e88efb 100644 (file)
@@ -21,6 +21,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <winpr/platform.h>
+#include <winpr/sysinfo.h>
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
 int Quiet = 0;
 
+
+
+/* ------------------------------------------------------------------------- */
+typedef struct
+{
+       UINT32  flag;
+       const char *str;
+} flagpair_t;
+
+static const flagpair_t flags[] =
+#ifdef _M_IX86_AMD64
+{
+       { PF_MMX_INSTRUCTIONS_AVAILABLE,                "MMX" },
+       { PF_3DNOW_INSTRUCTIONS_AVAILABLE,              "3DNow" },
+       { PF_XMMI_INSTRUCTIONS_AVAILABLE,               "SSE" },
+       { PF_XMMI64_INSTRUCTIONS_AVAILABLE,             "SSE2" },
+       { PF_SSE3_INSTRUCTIONS_AVAILABLE,               "SSE3" },
+#elif defined(_M_ARM)
+       { PF_ARM_VFP3,                                                  "VFP3" },
+       { PF_ARM_INTEL_WMMX,                                    "IWMMXT" },
+       { PF_ARM_NEON_INSTRUCTIONS_AVAILABLE,   "NEON" },
+#endif
+};
+
+static const flagpair_t flags_extended[] =
+{
+#ifdef _M_IX86_AMD64
+       { PF_EX_3DNOW_PREFETCH,     "3DNow-PF" },
+       { PF_EX_SSSE3,                          "SSSE3" },
+       { PF_EX_SSE41,                          "SSE4.1" },
+       { PF_EX_SSE42,                          "SSE4.2" },
+       { PF_EX_AVX,                            "AVX" },
+       { PF_EX_FMA,                            "FMA" },
+       { PF_EX_AVX_AES,                        "AVX-AES" },
+       { PF_EX_AVX2,                           "AVX2" },
+#elif defined(_M_ARM)
+       { PF_EX_ARM_VFP1,                       "VFP1"},
+       { PF_EX_ARM_VFP4,                       "VFP4" },
+#endif
+};
+
+void primitives_flags_str(char* str, size_t len)
+{
+       int i;
+
+       *str = '\0';
+       --len;  /* for the '/0' */
+
+       for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i)
+       {
+               if (IsProcessorFeaturePresent(flags[i].flag))
+               {
+                       int slen = strlen(flags[i].str) + 1;
+
+                       if (len < slen)
+                               break;
+
+                       if (*str != '\0')
+                               strcat(str, " ");
+
+                       strcat(str, flags[i].str);
+                       len -= slen;
+               }
+       }
+       for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i)
+       {
+               if (IsProcessorFeaturePresent(flags_extended[i].flag))
+               {
+                       int slen = strlen(flags_extended[i].str) + 1;
+
+                       if (len < slen)
+                               break;
+
+                       if (*str != '\0')
+                               strcat(str, " ");
+
+                       strcat(str, flags_extended[i].str);
+                       len -= slen;
+               }
+       }
+}
+
 /* ------------------------------------------------------------------------- */
 static void get_random_data_lrand(
     void *buffer,
@@ -198,7 +282,7 @@ static const test_t testTypeList[] =
 int main(int argc, char** argv)
 {
        int i;
-       char hints[256];
+       char hints[1024];
        UINT32 testSet = 0;
        UINT32 testTypes = 0;
        int results = SUCCESS;
@@ -253,7 +337,7 @@ int main(int argc, char** argv)
 
        primitives_init();
 
-       primitives_flags_str(primitives_get(), hints, sizeof(hints));
+       primitives_flags_str(hints, sizeof(hints));
        printf("Hints: %s\n", hints);
 
        /* COPY */
index 9c4d3d8..fa61025 100644 (file)
@@ -29,6 +29,7 @@
 #include <stdio.h>
 
 #include <freerdp/primitives.h>
+#include <winpr/platform.h>
 
 #ifdef WITH_IPP
 #include <ipps.h>
@@ -121,8 +122,8 @@ extern int test_or_32u_speed(void);
                } \
        } while (0)
 
-#if defined(i386) && defined(WITH_SSE2)
-#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
+#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON))
+#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
        do { \
                for (s=0; s<num_sizes; ++s) \
                { \
@@ -132,34 +133,15 @@ extern int test_or_32u_speed(void);
                        _prework_; \
                        iter = iterations/size; \
                        sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
-                       MEASURE_TIMED(label, iter, test_time, resultSSENeon[s],  \
-                               _funcSSE_); \
+                       MEASURE_TIMED(label, iter, test_time, resultOpt[s],  \
+                               _funcOpt_); \
                } \
        } while (0)
 #else
-#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
+#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
 #endif
 
-#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
-#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
-       do { \
-               for (s=0; s<num_sizes; ++s) \
-               { \
-                       int iter; \
-                       char label[256]; \
-                       int size = size_array[s]; \
-                       _prework_; \
-                       iter = iterations/size; \
-                       sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
-                       MEASURE_TIMED(label, iter, test_time, resultSSENeon[s],  \
-                               _funcNeon_); \
-               } \
-       } while (0)
-#else
-#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
-#endif
-
-#if defined(i386) && defined(WITH_IPP)
+#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
 #define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
        do { \
                for (s=0; s<num_sizes; ++s) \
@@ -182,8 +164,7 @@ extern int test_or_32u_speed(void);
 #define STD_SPEED_TEST( \
        _name_, _srctype_, _dsttype_, _prework_, \
        _doNormal_, _funcNormal_, \
-       _doSSE_,    _funcSSE_,  _flagsSSE_, \
-       _doNeon_,   _funcNeon_, _flagsNeon_, \
+       _doOpt_,    _funcOpt_,  _flagOpt_, _flagExt_, \
        _doIPP_,    _funcIPP_) \
 static void _name_( \
        const char *oplabel, const char *type, \
@@ -193,24 +174,28 @@ static void _name_( \
        int iterations, float test_time) \
 { \
        int s; \
-       float *resultNormal, *resultSSENeon, *resultIPP; \
-       UINT32 pflags = primitives_get_flags(primitives_get()); \
+       float *resultNormal, *resultOpt, *resultIPP; \
        resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
-       resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
+       resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
        resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
        printf("******************** %s %s ******************\n",  \
                oplabel, type); \
        if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
-       if (_doSSE_) { \
-               if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
+       if (_doOpt_)  \
+       { \
+               if (_flagExt_) \
                { \
-                       DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
+                       if (IsProcessorFeaturePresentEx(_flagOpt_)) \
+                       { \
+                               DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
+                       } \
                } \
-       } \
-       if (_doNeon_) { \
-               if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
+               else \
                { \
-                       DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
+                       if (IsProcessorFeaturePresent(_flagOpt_)) \
+                       { \
+                               DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
+                       } \
                } \
        } \
        if (_doIPP_)    { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
@@ -223,13 +208,13 @@ static void _name_( \
                strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
                strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
                if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
-               if (resultSSENeon[s] > 0.0) \
+               if (resultOpt[s] > 0.0) \
                { \
-                       _floatprint(resultSSENeon[s], sSN); \
+                       _floatprint(resultOpt[s], sSN); \
                        if (resultNormal[s] > 0.0) \
                        { \
                                sprintf(sSNp, "%d%%", \
-                                       (int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
+                                       (int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
                        } \
                } \
                if (resultIPP[s] > 0.0) \
@@ -244,7 +229,7 @@ static void _name_( \
                printf("%8d: %15s %15s %5s %15s %5s\n",  \
                        size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
        } \
-       free(resultNormal); free(resultSSENeon);  free(resultIPP); \
+       free(resultNormal); free(resultOpt);  free(resultIPP); \
 }
 
 #endif // !__PRIMTEST_H_INCLUDED__
index bcdd643..083c379 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 #define FUNC_TEST_SIZE 65536
@@ -35,7 +36,6 @@ int test_add16s_func(void)
        int failed = 0;
        int i;
        char testStr[256];
-       UINT32 pflags = primitives_get_flags(primitives_get());
 
        testStr[0] = '\0';
        get_random_data(src1, sizeof(src1));
@@ -44,7 +44,7 @@ int test_add16s_func(void)
        memset(d2, 0, sizeof(d2));
        general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE3_AVAILABLE)
+       if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE3");
                /* Aligned */
@@ -91,8 +91,7 @@ int test_add16s_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
        TRUE, general_add_16s(src1, src2, dst, size),
-       TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsAdd_16s(src1, src2, dst, size));
 
 int test_add16s_speed(void)
index a39157a..2d8285f 100644 (file)
@@ -15,6 +15,7 @@
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
+#include <winpr/sysinfo.h>
 
 #include "prim_test.h"
 
@@ -110,7 +111,6 @@ int test_alphaComp_func(void)
        UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
        UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
        int error = 0;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
        UINT32 *ptr;
        int i, x, y;
@@ -133,7 +133,7 @@ int test_alphaComp_func(void)
                (const BYTE *) src2, 4*SRC2_WIDTH,
                (BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH, 
@@ -166,7 +166,7 @@ int test_alphaComp_func(void)
                                error = 1;
                        }
 #ifdef _M_IX86_AMD64
-                       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+                       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
                        {
                                UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
                                if (colordist(c0, c2) > TOLERANCE)
@@ -207,8 +207,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
        TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
                size, size),
        TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
-               size, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+               size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
                size, size));
 
index 54e1ead..6e8b3d8 100644 (file)
@@ -15,6 +15,7 @@
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
+#include <winpr/sysinfo.h>
 
 #include "prim_test.h"
 
@@ -39,7 +40,6 @@ int test_and_32u_func(void)
        UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
        int failed = 0;
        int i;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
 
        testStr[0] = '\0';
@@ -56,7 +56,7 @@ int test_and_32u_func(void)
                }
        }
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE3_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE3");
                /* Aligned */
@@ -92,8 +92,7 @@ int test_and_32u_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
        TRUE, general_andC_32u(src1, constant, dst, size),
-       TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsAndC_32u(src1, constant, dst, size))
 
 int test_and_32u_speed(void)
@@ -113,7 +112,6 @@ int test_or_32u_func(void)
        UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
        int failed = 0;
        int i;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
 
        testStr[0] = '\0';
@@ -130,7 +128,7 @@ int test_or_32u_func(void)
                }
        }
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE3_AVAILABLE)
+       if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE3");
                /* Aligned */
@@ -166,8 +164,7 @@ int test_or_32u_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
        TRUE, general_orC_32u(src1, constant, dst, size),
-       TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsOrC_32u(src1, constant, dst, size))
 
 int test_or_32u_speed(void)
index d86d76e..e5192c0 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 static const int RGB_TRIAL_ITERATIONS = 1000;
@@ -38,7 +39,6 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
        UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
        int i;
        int failed = 0;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
        INT16 *ptrs[3];
        prim_size_t roi = { 64, 64 };
@@ -62,7 +62,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
        general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
                (BYTE *) out1, 64*4, &roi);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
@@ -90,8 +90,7 @@ STD_SPEED_TEST(
                (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
        TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
                (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
-               PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+               PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        FALSE, dst=dst);
 
 int test_RGBToRGB_16s8u_P3AC4R_speed(void)
@@ -131,7 +130,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
        INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
        int i;
        int failed = 0;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
        const INT16 *in[3];
        INT16 *out1[3];
@@ -168,7 +166,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
 
        general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
@@ -193,8 +191,7 @@ STD_SPEED_TEST(
        ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
        TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
        TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
-               PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+               PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        FALSE, dst=dst);
 
 int test_yCbCrToRGB_16s16s_P3P3_speed(void)
index d92af53..5f2ed91 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
@@ -70,8 +71,7 @@ int test_copy8u_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
        TRUE, memcpy(dst, src1, size),
-       FALSE, NULL, 0,
-       FALSE, NULL, 0,
+       FALSE, NULL, 0, FALSE,
        TRUE, ippsCopy_8u(src1, dst, size));
 
 int test_copy8u_speed(void)
index 597f766..0343674 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
@@ -40,12 +41,11 @@ int test_set8u_func(void)
        int failed = 0;
        int off;
        char testStr[256];
-       UINT32 pflags = primitives_get_flags(primitives_get());
        testStr[0] = '\0';
 
 #ifdef _M_IX86_AMD64
        /* Test SSE under various alignments */
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                for (off=0; off<16; ++off)
@@ -101,8 +101,7 @@ int test_set8u_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
        TRUE, memset(dst, constant, size),
-       FALSE, NULL, 0,
-       FALSE, NULL, 0,
+       FALSE, NULL, 0, FALSE,
        TRUE, ippsSet_8u(constant, dst, size));
 
 int test_set8u_speed(void)
@@ -116,17 +115,15 @@ int test_set8u_speed(void)
 /* ------------------------------------------------------------------------- */
 int test_set32s_func(void)
 {
-       primitives_t* prims = primitives_get();
        INT32 ALIGN(dest[512]);
        int failed = 0;
        int off;
        char testStr[256];
-       UINT32 pflags = primitives_get_flags(prims);
        testStr[0] = '\0';
 
 #ifdef _M_IX86_AMD64
        /* Test SSE under various alignments */
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                for (off=0; off<16; ++off) {
@@ -179,17 +176,15 @@ int test_set32s_func(void)
 /* ------------------------------------------------------------------------- */
 int test_set32u_func(void)
 {
-       primitives_t* prims = primitives_get();
        UINT32 ALIGN(dest[512]);
        int failed = 0;
        int off;
        char testStr[256];
-       UINT32 pflags = primitives_get_flags(prims);
        testStr[0] = '\0';
 
 #ifdef _M_IX86_AMD64
        /* Test SSE under various alignments */
-       if (pflags & PRIM_X86_SSE2_AVAILABLE)
+       if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
        {
                strcat(testStr, " SSE2");
                for (off=0; off<16; ++off) {
@@ -251,8 +246,7 @@ static inline void memset32u_naive(
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
        TRUE, memset32u_naive(constant, dst, size),
-       TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ipp_wrapper_set_32u(constant, dst, size));
 
 int test_set32u_speed(void)
@@ -280,8 +274,7 @@ static inline void memset32s_naive(
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
        TRUE, memset32s_naive(constant, dst, size),
-       TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsSet_32s(constant, dst, size));
 
 int test_set32s_speed(void)
index 1b8a5c1..d72407e 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 #define FUNC_TEST_SIZE 65536
@@ -55,12 +56,11 @@ int _name_(void) \
                ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
        int failed = 0; \
        int i; \
-       UINT32 pflags = primitives_get_flags(primitives_get()); \
        char testStr[256]; \
        testStr[0] = '\0'; \
        get_random_data(src, sizeof(src)); \
        _f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
-       if (pflags & PRIM_X86_SSE3_AVAILABLE) \
+       if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
        { \
                strcat(testStr, " SSE3"); \
                /* Aligned */ \
@@ -109,23 +109,19 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
 /* ========================================================================= */
 STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
     TRUE, general_lShiftC_16s(src1, constant, dst, size),
-       TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsLShiftC_16s(src1, constant, dst, size));
 STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
     TRUE, general_lShiftC_16u(src1, constant, dst, size),
-       TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsLShiftC_16u(src1, constant, dst, size));
 STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
     TRUE, general_rShiftC_16s(src1, constant, dst, size),
-       TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsRShiftC_16s(src1, constant, dst, size));
 STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
     TRUE, general_rShiftC_16u(src1, constant, dst, size),
-       TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
        TRUE, ippsRShiftC_16u(src1, constant, dst, size));
 
 /* ------------------------------------------------------------------------- */
index 0123035..99f5a60 100644 (file)
@@ -16,6 +16,7 @@
 #include "config.h"
 #endif
 
+#include <winpr/sysinfo.h>
 #include "prim_test.h"
 
 static const int SIGN_PRETEST_ITERATIONS = 100000;
@@ -30,7 +31,6 @@ int test_sign16s_func(void)
        INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
        int failed = 0;
        int i;
-       UINT32 pflags = primitives_get_flags(primitives_get());
        char testStr[256];
 
        /* Test when we can reach 16-byte alignment */
@@ -38,7 +38,7 @@ int test_sign16s_func(void)
        get_random_data(src, sizeof(src));
        general_sign_16s(src+1, d1+1, 65535);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSSE3_AVAILABLE)
+       if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
        {
                strcat(testStr, " SSSE3");
                ssse3_sign_16s(src+1, d2+1, 65535);
@@ -58,7 +58,7 @@ int test_sign16s_func(void)
        get_random_data(src, sizeof(src));
        general_sign_16s(src+1, d1+2, 65535);
 #ifdef _M_IX86_AMD64
-       if (pflags & PRIM_X86_SSSE3_AVAILABLE)
+       if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
        {
                ssse3_sign_16s(src+1, d2+2, 65535);
                for (i=2; i<65535; ++i)
@@ -79,8 +79,7 @@ int test_sign16s_func(void)
 /* ------------------------------------------------------------------------- */
 STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
        TRUE, general_sign_16s(src1, dst, size),
-       TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
-       FALSE, dst=dst, 0,
+       TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
        FALSE, dst=dst);
 
 int test_sign16s_speed(void)