#include "SkTypes.h"
-namespace SkOpts {
- extern void (*memset16)(uint16_t[], uint16_t, int);
- extern void (*memset32)(uint32_t[], uint32_t, int);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-// Inlining heuristics were determined by using perf.skia.org and bench/MemsetBench.cpp.
-// When using MSVC, inline is better >= 1K and worse <= 100. The Nexus Player was the opposite.
-// Otherwise, when NEON or SSE is available to GCC or Clang, they can handle it best.
-// See https://code.google.com/p/chromium/issues/detail?id=516426#c15 for more details.
-// See also skia:4316; it might be a good idea to use rep stosw/stosd here.
-#define INLINE_IF(cond) if (cond) { while (count --> 0) { *buffer++ = value; } return; }
-
/** Similar to memset(), but it assigns a 16bit value into the buffer.
@param buffer The memory to have value copied into it
@param value The 16bit value to be copied into buffer
@param count The number of times value should be copied into the buffer.
*/
static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
-#if defined(_MSC_VER)
- INLINE_IF(count > 300)
-#elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86)
- INLINE_IF(count < 300)
-#elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- INLINE_IF(true)
-#else
- INLINE_IF(count <= 10)
-#endif
- SkOpts::memset16(buffer, value, count);
+ for (int i = 0; i < count; i++) {
+ buffer[i] = value;
+ }
}
/** Similar to memset(), but it assigns a 32bit value into the buffer.
@param count The number of times value should be copied into the buffer.
*/
static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
-#if defined(_MSC_VER)
- INLINE_IF(count > 300)
-#elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86)
- INLINE_IF(count < 300)
-#elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- INLINE_IF(true)
-#else
- INLINE_IF(count <= 10)
-#endif
- SkOpts::memset32(buffer, value, count);
+ for (int i = 0; i < count; i++) {
+ buffer[i] = value;
+ }
}
-#undef INLINE_IF
-
///////////////////////////////////////////////////////////////////////////////
#define kMaxBytesInUTF8Sequence 4
#include "SkMorphologyImageFilter_opts.h"
#include "SkSwizzler_opts.h"
#include "SkTextureCompressor_opts.h"
-#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
namespace SK_OPTS_NS {
// If our global compile options are set high enough, these defaults might even be
// CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
// They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
- decltype(memset16) memset16 = sk_default::memset16;
- decltype(memset32) memset32 = sk_default::memset32;
decltype(create_xfermode) create_xfermode = sk_default::create_xfermode;
decltype(color_cube_filter_span) color_cube_filter_span = sk_default::color_cube_filter_span;
// Declare function pointers here...
- // See SkUtils.h
- extern void (*memset16)(uint16_t[], uint16_t, int);
- extern void (*memset32)(uint32_t[], uint32_t, int);
-
// May return nullptr if we haven't specialized the given Mode.
extern SkXfermode* (*create_xfermode)(const ProcCoeff&, SkXfermode::Mode);
#include "SkMorphologyImageFilter_opts.h"
#include "SkSwizzler_opts.h"
#include "SkTextureCompressor_opts.h"
-#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
namespace SkOpts {
void Init_neon() {
- memset16 = sk_neon::memset16;
- memset32 = sk_neon::memset32;
create_xfermode = sk_neon::create_xfermode;
box_blur_xx = sk_neon::box_blur_xx;
+++ /dev/null
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkUtils_opts_DEFINED
-#define SkUtils_opts_DEFINED
-
-namespace SK_OPTS_NS {
-
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-
-static void memset16(uint16_t* dst, uint16_t val, int n) {
- auto dst8 = (__m128i*)dst;
- auto val8 = _mm_set1_epi16(val);
- for ( ; n >= 8; n -= 8) {
- _mm_storeu_si128(dst8++, val8);
- }
- dst = (uint16_t*)dst8;
- if (n & 4) {
- _mm_storel_epi64((__m128i*)dst, val8);
- dst += 4;
- }
- if (n & 2) {
- *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t val, int n) {
- auto dst4 = (__m128i*)dst;
- auto val4 = _mm_set1_epi32(val);
- for ( ; n >= 4; n -= 4) {
- _mm_storeu_si128(dst4++, val4);
- }
- dst = (uint32_t*)dst4;
- if (n & 2) {
- _mm_storel_epi64((__m128i*)dst, val4);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-#elif defined(SK_ARM_HAS_NEON)
-
-static void memset16(uint16_t* dst, uint16_t value, int n) {
- uint16x8_t v8 = vdupq_n_u16(value);
- uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
-
- while (n >= 32) {
- vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 32;
- n -= 32;
- }
- switch (n / 8) {
- case 3: vst1q_u16(dst, v8); dst += 8;
- case 2: vst1q_u16(dst, v8); dst += 8;
- case 1: vst1q_u16(dst, v8); dst += 8;
- }
- if (n & 4) {
- vst1_u16(dst, vget_low_u16(v8));
- dst += 4;
- }
- switch (n & 3) {
- case 3: *dst++ = value;
- case 2: *dst++ = value;
- case 1: *dst = value;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t value, int n) {
- uint32x4_t v4 = vdupq_n_u32(value);
- uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
-
- while (n >= 16) {
- vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 16;
- n -= 16;
- }
- switch (n / 4) {
- case 3: vst1q_u32(dst, v4); dst += 4;
- case 2: vst1q_u32(dst, v4); dst += 4;
- case 1: vst1q_u32(dst, v4); dst += 4;
- }
- if (n & 2) {
- vst1_u32(dst, vget_low_u32(v4));
- dst += 2;
- }
- if (n & 1) {
- *dst = value;
- }
-}
-
-#else // Neither NEON nor SSE2.
-
-static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
-static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
-
-#endif
-
-} // namespace SK_OPTS_NS
-
-#endif//SkUtils_opts_DEFINED