meson - add checks/options for mmx, sse3, neon, altivec
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>
Wed, 7 Nov 2018 18:01:48 +0000 (18:01 +0000)
committerSangHyeon Jade Lee <sh10233.lee@samsung.com>
Tue, 20 Nov 2018 06:56:20 +0000 (15:56 +0900)
so we can build our assembly fast-paths again.... - also clean up the
code a bit to match...

meson.build
meson_options.txt
src/lib/evas/common/evas_cpu.c
src/lib/evas/common/meson.build
src/lib/evas/include/evas_blend_ops.h
src/lib/evas/include/evas_common_private.h
src/lib/evas/meson.build

index dcef2ea..a89b899 100644 (file)
@@ -1,7 +1,7 @@
 project('efl', ['c','cpp'],
   version: '1.21.99',
   default_options : ['buildtype=plain'],
-  meson_version : '>=0.46'
+  meson_version : '>=0.47'
 )
 
 if target_machine.system() == 'darwin'
@@ -110,6 +110,59 @@ if compiler.compiles(code, args : '-lc', name : 'environ check') == true
   config_h.set10('HAVE_ENVIRON', true)
 endif
 
+## or should this be target_machine?
+cpu_mmx = false
+cpu_sse3 = false
+cpu_neon = false
+cpu_neon_intrinsics = false
+cpu_altivec = false
+evas_opt_c_args = [ ]
+machine_c_args = [ ]
+compiler = meson.get_compiler('c')
+if host_machine.cpu_family() == 'x86' or host_machine.cpu_family() == 'x86_64'
+  if compiler.check_header('immintrin.h') == true
+    if (get_option('cpu-mmx') == true)
+      config_h.set10('BUILD_MMX', true)
+      cpu_mmx = true
+      message('x86 build - MMX enabled')
+      if (get_option('cpu-sse3') == true)
+        config_h.set10('BUILD_SSE3', true)
+        evas_opt_c_args += [ '-msse3' ]
+        cpu_sse3 = true
+        message('x86 build - SSE3 enabled')
+      endif
+    endif
+  endif
+elif host_machine.cpu_family() == 'arm'
+  if compiler.check_header('arm_neon.h') == true
+    if (get_option('cpu-neon') == true)
+      config_h.set10('BUILD_NEON', true)
+      machine_c_args += ['-mfpu=neon', '-ftree-vectorize']
+      cpu_neon = true
+      message('ARM build - NEON enabled')
+    endif
+  endif
+elif host_machine.cpu_family() == 'aarch64'
+  if compiler.check_header('arm_neon.h') == true
+    if (get_option('cpu-neon') == true)
+      config_h.set10('BUILD_NEON', true)
+      config_h.set10('BUILD_NEON_INTRINSICS', true)
+      machine_c_args += ['-ftree-vectorize']
+      cpu_neon = true
+      cpu_neon_intrinsics = true
+      message('ARM64 build - NEON + intrinsics enabled')
+    endif
+  endif
+elif host_machine.cpu_family() == 'ppc' or host_machine.cpu_family() == 'ppc64'
+  if compiler.check_header('altivec.h') == true
+    if (get_option('cpu-akltivec') == true)
+      config_h.set10('BUILD_ALTIVEC', true)
+      machine_c_args += [ '-maltivec' ]
+      cpu_altivec = true
+      message('PPC/POWER build - ALTIVEC enabled')
+    endif
+  endif
+endif
 
 config_dir = [include_directories('.')]
 eolian_include_directories = []
@@ -231,7 +284,8 @@ foreach package : subprojects
   package_version_name = '-'.join(package_name.split('_')) + '-' + version_major
   package_c_args = [
     '-DPACKAGE_DATA_DIR="'+ join_paths(dir_data, package_name)+'"',
-    '-DNEED_RUN_IN_TREE=1'
+    '-DNEED_RUN_IN_TREE=1',
+    machine_c_args
   ]
   automatic_pkgfile = true
   if package[1].length() == 0 or get_option(package[1][0])
index ad42f31..94d3007 100644 (file)
@@ -49,7 +49,7 @@ option('drm',
 option('cocoa',
   type : 'boolean',
   value : false,
-  description : 'Flag for handling drm support in efl'
+  description : 'Flag for handling apple cocoa support in efl'
 )
 
 option('physics',
@@ -327,3 +327,28 @@ option('mono',
   value: false,
   description: 'Flag for handling c# bindings'
 )
+
+option('cpu-mmx',
+  type: 'boolean',
+  value: true,
+  description: 'Build MMX support when building for intel'
+)
+
+option('cpu-sse3',
+  type: 'boolean',
+  value: true,
+  description: 'Build SSE3 support when building for intel'
+)
+
+option('cpu-neon',
+  type: 'boolean',
+  value: true,
+  description: 'Build NEON support when building for ARM'
+)
+
+option('cpu-altivec',
+  type: 'boolean',
+  value: true,
+  description: 'Build ALTIVEC support when building for PPC/POWER'
+)
+
index 6283d97..5551775 100644 (file)
@@ -1,8 +1,8 @@
 #include "evas_common_private.h"
 #ifdef BUILD_NEON
-#ifdef BUILD_NEON_INTRINSICS
-#include <arm_neon.h>
-#endif
+# ifdef BUILD_NEON_INTRINSICS
+#  include <arm_neon.h>
+# endif
 #endif
 
 static int cpu_feature_mask = 0;
@@ -15,20 +15,22 @@ static int cpu_feature_mask = 0;
 #  endif
 # endif
 #endif
+
 #ifdef __SPARC__
 #   define NEED_FEATURE_TEST
 #endif
+
 #if defined(__ARM_ARCH__)
 # ifdef BUILD_NEON
-#   define NEED_FEATURE_TEST
+#  define NEED_FEATURE_TEST
 # endif
 #endif
 
 #ifdef NEED_FEATURE_TEST
-#if defined (HAVE_STRUCT_SIGACTION) && defined (HAVE_SIGLONGJMP)
-#include <signal.h>
-#include <setjmp.h>
-#include <errno.h>
+# ifdef HAVE_SIGLONGJMP
+#  include <signal.h>
+#  include <setjmp.h>
+#  include <errno.h>
 
 static sigjmp_buf detect_buf;
 static void evas_common_cpu_catch_ill(int sig);
@@ -45,30 +47,30 @@ evas_common_cpu_catch_segv(int sig EINA_UNUSED)
 {
    siglongjmp(detect_buf, 1);
 }
-#endif
+# endif
 
-#ifdef BUILD_ALTIVEC
+# ifdef BUILD_ALTIVEC
 void
 evas_common_cpu_altivec_test(void)
 {
-#ifdef __POWERPC__
-#ifdef __VEC__
+#  ifdef __POWERPC__
+#   ifdef __VEC__
    vector unsigned int zero;
 
    zero = vec_splat_u32(0);
-#endif /* __VEC__ */
-#endif /* __POWERPC__ */
+#   endif /* __VEC__ */
+#  endif /* __POWERPC__ */
 }
-#endif /* BUILD_ALTIVEC */
+# endif /* BUILD_ALTIVEC */
 
 void
 evas_common_cpu_neon_test(void)
 {
 //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70)
-#ifdef BUILD_NEON
-#ifdef BUILD_NEON_INTRINSICS
+# ifdef BUILD_NEON
+#  ifdef BUILD_NEON_INTRINSICS
    volatile uint32x4_t temp = vdupq_n_u32(0x1);
-#else
+#  else
    asm volatile (
                ".fpu neon           \n\t"
                  "vqadd.u8 d0, d1, d0\n"
@@ -77,18 +79,32 @@ evas_common_cpu_neon_test(void)
                  : /* Clobbered */
                  "d0", "d1"
                  );
-#endif
-#endif
+#  endif
+# endif
 //#endif
 }
 
 void
 evas_common_cpu_vis_test(void)
 {
-#ifdef __SPARC__
-#endif /* __SPARC__ */
+# ifdef __SPARC__
+# endif /* __SPARC__ */
 }
 #endif /* NEED_FEATURE_TEST */
+
+
+#if defined(BUILD_MMX) || defined(BUILD_SSE3)
+# define NEED_CPU_CHECK
+#endif
+
+#ifdef NEED_FEATURE_TEST
+# ifndef HAVE_SIGLONGJMP
+#  undef NEED_CPU_CHECK
+#  define NEED_CPU_CHECK
+# endif
+#endif
+
+#ifdef NEED_CPU_CHECK
 static Eina_Bool
 _cpu_check(Eina_Cpu_Features f)
 {
@@ -97,13 +113,13 @@ _cpu_check(Eina_Cpu_Features f)
    features = eina_cpu_features_get();
    return (features & f) == f;
 }
-
+#endif
 
 #ifdef NEED_FEATURE_TEST
 int
 evas_common_cpu_feature_test(void (*feature)(void))
 {
-#if defined (HAVE_STRUCT_SIGACTION) && defined (HAVE_SIGLONGJMP)
+# ifdef HAVE_SIGLONGJMP
    int enabled = 1;
    struct sigaction act, oact, oact2;
 
@@ -119,9 +135,9 @@ evas_common_cpu_feature_test(void (*feature)(void))
 
    if (sigsetjmp(detect_buf, 1))
      {
-       sigaction(SIGILL, &oact, NULL);
-       sigaction(SIGSEGV, &oact2, NULL);
-       return 0;
+        sigaction(SIGILL, &oact, NULL);
+        sigaction(SIGSEGV, &oact2, NULL);
+        return 0;
      }
 
    feature();
@@ -129,16 +145,28 @@ evas_common_cpu_feature_test(void (*feature)(void))
    sigaction(SIGILL, &oact, NULL);
    sigaction(SIGSEGV, &oact2, NULL);
    return enabled;
-#else
+# else
+#  ifdef BUILD_MMX
    if (feature == evas_common_cpu_mmx_test)
      return _cpu_check(EINA_CPU_MMX);
    /* no mmx2 support in eina */
    if (feature == evas_common_cpu_sse_test)
      return _cpu_check(EINA_CPU_SSE);
+#  endif
+#  ifdef BUILD_SSE3
    if (feature == evas_common_cpu_sse3_test)
      return _cpu_check(EINA_CPU_SSE3);
+#  endif
+#  ifdef BUILD_ALTIVEC
+   if (feature == evas_common_cpu_altivec_test)
+     return _cpu_check(CPU_FEATURE_ALTIVEC);
+#  endif
+#  ifdef BUILD_NEON
+   if (feature == evas_common_cpu_neon_test)
+     return _cpu_check(EINA_CPU_NEON);
+#  endif
    return 0;
-#endif
+# endif
 }
 #endif
 
@@ -169,6 +197,7 @@ evas_common_cpu_init(void)
      cpu_feature_mask |= _cpu_check(EINA_CPU_SSE3) * CPU_FEATURE_SSE3;
 # endif /* BUILD_SSE3 */
 #endif /* BUILD_MMX */
+
 #ifdef BUILD_ALTIVEC
 # ifdef __POWERPC__
 #  ifdef __VEC__
@@ -183,6 +212,7 @@ evas_common_cpu_init(void)
 #  endif /* __VEC__ */
 # endif /* __POWERPC__ */
 #endif /* BUILD_ALTIVEC */
+
 #ifdef __SPARC__
    if (getenv("EVAS_CPU_NO_VIS"))
      cpu_feature_mask &= ~CPU_FEATURE_VIS;
@@ -193,6 +223,7 @@ evas_common_cpu_init(void)
         evas_common_cpu_end_opt();
      }
 #endif /* __SPARC__ */
+
 #if defined(__ARM_ARCH__)
 # ifdef BUILD_NEON
    if (getenv("EVAS_CPU_NO_NEON"))
@@ -200,17 +231,24 @@ evas_common_cpu_init(void)
    else
      {
         /* On linux eina_cpu sets this up with getauxval() */
-#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ASM_HWCAP_H) && defined(__arm__) && defined(__linux__)
+#  if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ASM_HWCAP_H) && defined(__arm__) && defined(__linux__)
         cpu_feature_mask |= CPU_FEATURE_NEON *
           !!(eina_cpu_features_get() & EINA_CPU_NEON);
-#else
+#  else
         cpu_feature_mask |= CPU_FEATURE_NEON *
           evas_common_cpu_feature_test(evas_common_cpu_neon_test);
         evas_common_cpu_end_opt();
-#endif
+#  endif
      }
 # endif
 #endif
+
+#if defined(__aarch64__)
+   if (getenv("EVAS_CPU_NO_NEON"))
+     cpu_feature_mask &= ~CPU_FEATURE_NEON;
+   else
+     cpu_feature_mask |= CPU_FEATURE_NEON;
+#endif
 }
 
 int
index 05fa244..e2df18b 100644 (file)
@@ -82,6 +82,18 @@ evas_src += files([
   'region.h'
 ])
 
+if cpu_sse3 == true
+  evas_src_opt +=  files([
+    'evas_op_blend/op_blend_master_sse3.c'
+  ])
+endif
+
+if cpu_neon == true and cpu_neon_intrinsics == false
+  evas_src_opt +=  files([
+    'evas_op_copy/op_copy_neon.S'
+  ])
+endif
+
 #subdir('evas_op_blend')
 #subdir('evas_op_sub')
 subdir('language')
index aad724a..e66d571 100644 (file)
@@ -258,6 +258,10 @@ static __m128i RGB_MASK_SSE3;
 
 static __m128i ALPHA_SSE3;
 
+#ifndef EFL_ALWAYS_INLINE
+# define EFL_ALWAYS_INLINE inline
+#endif
+
 static EFL_ALWAYS_INLINE __m128i
 mul_256_sse3(__m128i a, __m128i c) {
 
index 2598bbe..050df28 100755 (executable)
@@ -1,9 +1,9 @@
 #ifndef EVAS_COMMON_H
 #define EVAS_COMMON_H
 
-#ifdef HAVE_CONFIG_H
+//#ifdef HAVE_CONFIG_H
 #include "config.h"  /* so that EAPI in Evas.h is correctly defined */
-#endif
+//#endif
 
 #ifdef STDC_HEADERS
 # include <stdlib.h>
index 0dfc02c..adea8b2 100644 (file)
@@ -108,6 +108,8 @@ evas_src = [
    'main.c'
 ]
 
+evas_src_opt = [ ]
+
 evas_src += vg_common_src
 
 evas_deps += dependency('freetype2')
@@ -177,9 +179,25 @@ evas_pre = declare_dependency(
   dependencies: [eina, eo, ector, emile, evas_deps, m],
 )
 
+evas_link = [ ]
+
+if cpu_sse3 == true or cpu_neon == true and cpu_neon_intrinsics == false
+  evas_opt = static_library('evas_opt',
+    sources: evas_src_opt,
+    include_directories:
+      [ include_directories('../../..') ] +
+      evas_include_directories +
+      [vg_common_inc_dir],
+    c_args: evas_opt_c_args,
+    dependencies: [eina, eo, ector, emile, evas_deps, m],
+  )
+  evas_link += [ evas_opt ]
+endif
+
 evas_pre_lib_dep = declare_dependency(
   include_directories: evas_include_directories + [vg_common_inc_dir],
   sources : [evas_src, pub_eo_file_target],
+  link_with: evas_link,
   dependencies: [evas_deps, m, draw, valgrind, libunibreak]
 )