src/lib/sw_engine/tvgSwRasterNeon.h

   1 /*
   2  * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
   3
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #ifdef THORVG_NEON_VECTOR_SUPPORT
  24
  25 #include <arm_neon.h>
  26
  27 static inline uint8x8_t ALPHA_BLEND_NEON(uint8x8_t c, uint8x8_t a)
  28 {
  29         uint16x8_t t = vmull_u8(c, a);
  30         return vshrn_n_u16(t, 8);
  31 }
  32
  33
  34 static inline void neonRasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
  35 {
  36     uint32_t iterations = len / 4;
  37     uint32_t neonFilled = iterations * 4;
  38
  39     dst += offset;
  40     uint32x4_t vectorVal = {val, val, val, val};
  41
  42     for (uint32_t i = 0; i < iterations; ++i) {
  43         vst1q_u32(dst, vectorVal);
  44         dst += 4;
  45     }
  46
  47     int32_t leftovers = len - neonFilled;
  48     while (leftovers--) *dst++ = val;
  49 }
  50
  51
  52 static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData* rle, uint32_t color)
  53 {
  54     auto span = rle->spans;
  55     uint32_t src;
  56     uint8x8_t *vDst = NULL;
  57
  58     for (uint32_t i = 0; i < rle->size; ++i) {
  59         auto dst = &surface->buffer[span->y * surface->stride + span->x];
  60         uint32_t align = 0;
  61
  62         if ((((uint32_t) dst) & 0x7) != 0) {
  63             vDst = (uint8x8_t*)(dst+1);
  64             align = 1;
  65         } else {
  66             vDst = (uint8x8_t*) dst;
  67         }
  68
  69         if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage);
  70         else src = color;
  71         auto ialpha = 255 - surface->blender.alpha(src);
  72
  73         uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src);
  74         uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha);
  75
  76         uint32_t iterations = (span->len - align) / 2;
  77         uint32_t left = (span->len - align) % 2;
  78
  79         //Fill not aligned byte
  80         if (align) *dst = src + ALPHA_BLEND(*dst, ialpha);
  81
  82         for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha));
  83
  84         if (left) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
  85
  86         ++span;
  87     }
  88     return true;
  89 }
  90
  91 #endif