From 66f3b1e66b4cbdf801aff369740926775e1e0f7a Mon Sep 17 00:00:00 2001 From: Michal Szczecinski Date: Thu, 12 Aug 2021 16:02:40 +0200 Subject: [PATCH] sw common: Added neon translucent rect API implementation. Changes: Added neonRasterTranslucentRect implementation. Rendering was tested on 32 lottie fiels. Without neon ~ 18.1 FPS was measured. With neon ~ 20.1 FPS was measured. --- src/lib/sw_engine/tvgSwRaster.cpp | 19 +++++++------------ src/lib/sw_engine/tvgSwRasterC.h | 16 ++++++++++++++++ src/lib/sw_engine/tvgSwRasterNeon.h | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/src/lib/sw_engine/tvgSwRaster.cpp b/src/lib/sw_engine/tvgSwRaster.cpp index 8498561..d42a6d3 100644 --- a/src/lib/sw_engine/tvgSwRaster.cpp +++ b/src/lib/sw_engine/tvgSwRaster.cpp @@ -120,18 +120,13 @@ static uint32_t _applyBilinearInterpolation(const uint32_t *img, uint32_t w, uin static bool _translucentRect(SwSurface* surface, const SwBBox& region, uint32_t color) { - auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x; - auto h = static_cast(region.max.y - region.min.y); - auto w = static_cast(region.max.x - region.min.x); - auto ialpha = 255 - surface->blender.alpha(color); - - for (uint32_t y = 0; y < h; ++y) { - auto dst = &buffer[y * surface->stride]; - for (uint32_t x = 0; x < w; ++x) { - dst[x] = color + ALPHA_BLEND(dst[x], ialpha); - } - } - return true; + #if defined(THORVG_AVX_VECTOR_SUPPORT) + return cRasterTranslucentRect(surface, region, color); + #elif defined(THORVG_NEON_VECTOR_SUPPORT) + return neonRasterTranslucentRect(surface, region, color); + #else + return cRasterTranslucentRect(surface, region, color); + #endif } diff --git a/src/lib/sw_engine/tvgSwRasterC.h b/src/lib/sw_engine/tvgSwRasterC.h index c638a19..b634edf 100644 --- a/src/lib/sw_engine/tvgSwRasterC.h +++ b/src/lib/sw_engine/tvgSwRasterC.h @@ -47,4 +47,20 @@ static inline bool cRasterTranslucentRle(SwSurface* surface, const SwRleData* rl ++span; } return true; +} + +static inline bool cRasterTranslucentRect(SwSurface* surface, const SwBBox& region, uint32_t color) +{ + auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x; + auto h = static_cast(region.max.y - region.min.y); + auto w = static_cast(region.max.x - region.min.x); + auto ialpha = 255 - surface->blender.alpha(color); + + for (uint32_t y = 0; y < h; ++y) { + auto dst = &buffer[y * surface->stride]; + for (uint32_t x = 0; x < w; ++x) { + dst[x] = color + ALPHA_BLEND(dst[x], ialpha); + } + } + return true; } \ No newline at end of file diff --git a/src/lib/sw_engine/tvgSwRasterNeon.h b/src/lib/sw_engine/tvgSwRasterNeon.h index 2d32caa..a1c1b0f 100644 --- a/src/lib/sw_engine/tvgSwRasterNeon.h +++ b/src/lib/sw_engine/tvgSwRasterNeon.h @@ -88,4 +88,41 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData* return true; } + +static inline bool neonRasterTranslucentRect(SwSurface* surface, const SwBBox& region, uint32_t color) +{ + uint32_t *buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x; + uint32_t h = (uint32_t)(region.max.y - region.min.y); + uint32_t w = (uint32_t)(region.max.x - region.min.x); + + uint32_t ialpha = 255 - surface->blender.alpha(color); + + uint8x8_t vColor = (uint8x8_t) vdup_n_u32(color); + uint8x8_t vIalpha = (uint8x8_t) vdup_n_u8((uint8_t) ialpha); + + uint8x8_t *vDst = NULL; + + for (uint32_t y = 0; y < h; ++y) { + uint32_t align = 0; + uint32_t *dst = &buffer[y * surface->stride]; + + if ((((uint32_t) dst) & 0x7) != 0) { + vDst = (uint8x8_t*)(dst+1); + align = 1; + } else { + vDst = (uint8x8_t*) dst; + } + + uint32_t iterations = w / 2; + uint32_t left = w % 2; + + if (align) *dst = color + ALPHA_BLEND(*dst, ialpha); + + for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vColor, ALPHA_BLEND_NEON(vDst[x], vIalpha)); + + if (left) dst[w] = color + ALPHA_BLEND(dst[w], ialpha); + } + return true; +} + #endif \ No newline at end of file -- 2.7.4