src/lib/sw_engine/tvgSwRasterNeon.h

   1 /*
   2  * Copyright (c) 2021 - 2023 the ThorVG project. All rights reserved.
   3
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #ifdef THORVG_NEON_VECTOR_SUPPORT
  24
  25 #include <arm_neon.h>
  26
  27 static inline uint8x8_t ALPHA_BLEND(uint8x8_t c, uint8x8_t a)
  28 {
  29     uint16x8_t t = vmull_u8(c, a);
  30     return vshrn_n_u16(t, 8);
  31 }
  32
  33
  34 static void neonRasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
  35 {
  36     uint32_t iterations = len / 4;
  37     uint32_t neonFilled = iterations * 4;
  38
  39     dst += offset;
  40     uint32x4_t vectorVal = {val, val, val, val};
  41
  42     for (uint32_t i = 0; i < iterations; ++i) {
  43         vst1q_u32(dst, vectorVal);
  44         dst += 4;
  45     }
  46
  47     int32_t leftovers = len - neonFilled;
  48     while (leftovers--) *dst++ = val;
  49 }
  50
  51
  52 static bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData* rle, uint32_t color)
  53 {
  54     auto span = rle->spans;
  55     uint32_t src;
  56     uint8x8_t *vDst = nullptr;
  57     uint16_t align;
  58
  59     for (uint32_t i = 0; i < rle->size; ++i) {
  60         if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage);
  61         else src = color;
  62
  63         auto dst = &surface->buffer[span->y * surface->stride + span->x];
  64         auto ialpha = 255 - _alpha(src);
  65
  66         if ((((uint32_t) dst) & 0x7) != 0) {
  67             //fill not aligned byte
  68             *dst = src + ALPHA_BLEND(*dst, ialpha);
  69             vDst = (uint8x8_t*)(dst + 1);
  70             align = 1;
  71         } else {
  72             vDst = (uint8x8_t*) dst;
  73             align = 0;
  74         }
  75
  76         uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src);
  77         uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha);
  78
  79         for (uint32_t x = 0; x < (span->len - align) / 2; ++x)
  80             vDst[x] = vadd_u8(vSrc, ALPHA_BLEND(vDst[x], vIalpha));
  81
  82         auto leftovers = (span->len - align) % 2;
  83         if (leftovers > 0) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
  84
  85         ++span;
  86     }
  87     return true;
  88 }
  89
  90
  91 static bool neonRasterTranslucentRect(SwSurface* surface, const SwBBox& region, uint32_t color)
  92 {
  93     auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
  94     auto h = static_cast<uint32_t>(region.max.y - region.min.y);
  95     auto w = static_cast<uint32_t>(region.max.x - region.min.x);
  96     auto ialpha = 255 - _alpha(color);
  97
  98     auto vColor = vdup_n_u32(color);
  99     auto vIalpha = vdup_n_u8((uint8_t) ialpha);
 100
 101     uint8x8_t* vDst = nullptr;
 102     uint32_t align;
 103
 104     for (uint32_t y = 0; y < h; ++y) {
 105         auto dst = &buffer[y * surface->stride];
 106
 107         if ((((uint32_t) dst) & 0x7) != 0) {
 108             //fill not aligned byte
 109             *dst = color + ALPHA_BLEND(*dst, ialpha);
 110             vDst = (uint8x8_t*) (dst + 1);
 111             align = 1;
 112         } else {
 113             vDst = (uint8x8_t*) dst;
 114             align = 0;
 115         }
 116
 117         for (uint32_t x = 0; x <  (w - align) / 2; ++x)
 118             vDst[x] = vadd_u8((uint8x8_t)vColor, ALPHA_BLEND(vDst[x], vIalpha));
 119
 120         auto leftovers = (w - align) % 2;
 121         if (leftovers > 0) dst[w - 1] = color + ALPHA_BLEND(dst[w - 1], ialpha);
 122     }
 123     return true;
 124 }
 125
 126 #endif