config_h.set10('THORVG_AVX_VECTOR_SUPPORT', true)
endif
+if get_option('vectors').contains('neon') == true
+ config_h.set10('THORVG_NEON_VECTOR_SUPPORT', true)
+endif
+
if get_option('bindings').contains('capi') == true
config_h.set10('THORVG_CAPI_BINDING_SUPPORT', true)
endif
description: 'Enable File Savers in thorvg')
option('vectors',
- type: 'array',
- choices: ['', 'avx'],
- value: [''],
+ type: 'combo',
+ choices: ['', 'avx', 'neon'],
+ value: '',
description: 'Enable CPU Vectorization(SIMD) in thorvg')
option('bindings',
#include <immintrin.h>
#endif
+#ifdef THORVG_NEON_VECTOR_SUPPORT
+ #include <arm_neon.h>
+#endif
+
#if 0
#include <sys/time.h>
static double timeStamp()
static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
{
-#ifdef THORVG_AVX_VECTOR_SUPPORT
+#if defined(THORVG_AVX_VECTOR_SUPPORT)
//1. calculate how many iterations we need to cover length
uint32_t iterations = len / 8;
uint32_t avxFilled = iterations * 8;
dst+= avxFilled;
while (leftovers--) *dst++ = val;
+#elif defined(THORVG_NEON_VECTOR_SUPPORT)
+ uint32_t iterations = len / 4;
+ uint32_t neonFilled = iterations * 4;
+ int32_t leftovers = 0;
+
+ dst+=offset;
+ uint32x4_t vectorVal = { val, val, val, val };
+
+ for (uint32_t i = 0; i < iterations; ++i) {
+ vst1q_u32(dst, vectorVal);
+ dst += 4;
+ }
+
+ leftovers = len - neonFilled;
+ while (leftovers--) *dst++ = val;
#else
dst += offset;
while (len--) *dst++ = val;
if get_option('vectors').contains('avx')
compiler_flags += ['-mavx']
endif
+ if get_option('vectors').contains('neon')
+ compiler_flags += ['-mfpu=neon-vfpv4']
+ endif
if get_option('b_sanitize') == 'none'
compiler_flags += ['-fno-exceptions', '-fno-rtti',
'-fno-unwind-tables' , '-fno-asynchronous-unwind-tables',