From d0e83c25adf66b3f094939e7345882a62ccfa1f4 Mon Sep 17 00:00:00 2001 From: Michal Szczecinski Date: Wed, 11 Aug 2021 16:23:04 +0200 Subject: [PATCH] sw_common neon: Fix issue with unaligned memory. Neon operations should be performed on memory aligned to 8 bytes. This commit fixes this issue and increase stability of neon rasterizer. Change-Id: I348d18137dcd973d26948916758e1e523f565503 --- src/lib/sw_engine/tvgSwRasterNeon.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/lib/sw_engine/tvgSwRasterNeon.h b/src/lib/sw_engine/tvgSwRasterNeon.h index 0ca8304..2d32caa 100644 --- a/src/lib/sw_engine/tvgSwRasterNeon.h +++ b/src/lib/sw_engine/tvgSwRasterNeon.h @@ -53,33 +53,39 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData* { auto span = rle->spans; uint32_t src; + uint8x8_t *vDst = NULL; for (uint32_t i = 0; i < rle->size; ++i) { auto dst = &surface->buffer[span->y * surface->stride + span->x]; + uint32_t align = 0; - uint8x8_t *vDst = (uint8x8_t*) dst; + if ((((uint32_t) dst) & 0x7) != 0) { + vDst = (uint8x8_t*)(dst+1); + align = 1; + } else { + vDst = (uint8x8_t*) dst; + } if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage); else src = color; auto ialpha = 255 - surface->blender.alpha(src); uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src); - uint8x8_t vIalpha = (uint8x8_t) vdup_n_u32(ialpha); + uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha); - uint32_t iterations = span->len / 2; - uint32_t left = span->len % 2; + uint32_t iterations = (span->len - align) / 2; + uint32_t left = (span->len - align) % 2; - for (uint32_t x = 0; x < iterations; x+=2) { - vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha)); - } + //Fill not aligned byte + if (align) *dst = src + ALPHA_BLEND(*dst, ialpha); + + for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha)); + + if (left) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha); - if (left) { - dst[span->len] = src + ALPHA_BLEND(dst[span->len], ialpha); - } ++span; } return true; } - #endif \ No newline at end of file -- 2.7.4