#include "tvgCommon.h"
+#ifdef THORVG_AVX_VECTOR_SUPPORT
+ #include <immintrin.h>
+#endif
+
#if 0
#include <sys/time.h>
static double timeStamp()
}
-static inline void COLOR_SET(uint32_t *dst, uint32_t val, uint32_t len)
-{
- while (len--) *dst++ = val;
-}
-
-
int64_t mathMultiply(int64_t a, int64_t b);
int64_t mathDivide(int64_t a, int64_t b);
int64_t mathMulDiv(int64_t a, int64_t b, int64_t c);
bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, bool ctable);
void fillReset(SwFill* fill);
void fillFree(SwFill* fill);
-void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len);
+void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t offset, uint32_t len);
void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len);
SwRleData* rleRender(const SwOutline* outline, const SwBBox& bbox, const SwSize& clip, bool antiAlias);
bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
bool rasterClear(Surface& surface);
+inline void rasterARGB32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
+{
+#ifdef THORVG_AVX_VECTOR_SUPPORT
+ int32_t align = (8 - (offset % 8)) % 8;
+ //Vectorization
+ auto avxDst = (__m256i*)(dst + offset + align);
+ int32_t i = (len - align);
+ for (;i > 7; i -= 8, ++avxDst) {
+ *avxDst = _mm256_set1_epi32(val);
+ }
+ //Alignment
+ if (align > 0) {
+ if (align > len) align -= (align - len);
+ auto tmp = dst + offset;
+ for (; align > 0; --align, ++tmp) *tmp = val;
+ }
+ //Pack Leftovers
+ dst += offset + (len - i);
+ while (i-- > 0) *(dst++) = val;
+#else
+ dst += offset;
+ while (len--) *dst++ = val;
+#endif
+}
+
#endif /* _TVG_SW_COMMON_H_ */
#include "tvgSwCommon.h"
-
/************************************************************************/
/* Internal Class Implementation */
/************************************************************************/
static bool _rasterSolidRect(Surface& surface, const SwBBox& region, uint32_t color)
{
- auto buffer = surface.buffer + (region.min.y * surface.stride) + region.min.x;
- auto h = static_cast<uint32_t>(region.max.y - region.min.y);
+ auto buffer = surface.buffer + (region.min.y * surface.stride);
auto w = static_cast<uint32_t>(region.max.x - region.min.x);
+ auto h = static_cast<uint32_t>(region.max.y - region.min.y);
for (uint32_t y = 0; y < h; ++y) {
- auto dst = &buffer[y * surface.stride];
- COLOR_SET(dst, color, w);
+ rasterARGB32(buffer + y * surface.stride, color, region.min.x, w);
}
return true;
}
auto span = rle->spans;
for (uint32_t i = 0; i < rle->size; ++i) {
- auto dst = &surface.buffer[span->y * surface.stride + span->x];
if (span->coverage == 255) {
- COLOR_SET(dst, color, span->len);
+ rasterARGB32(surface.buffer + span->y * surface.stride, color, span->x, span->len);
} else {
+ auto dst = &surface.buffer[span->y * surface.stride + span->x];
auto src = COLOR_ALPHA_BLEND(color, span->coverage);
auto ialpha = 255 - span->coverage;
for (uint32_t i = 0; i < span->len; ++i) {
for (uint32_t y = 0; y < h; ++y) {
auto dst = &buffer[y * surface.stride];
- fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, w);
+ fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, 0, w);
for (uint32_t x = 0; x < w; ++x) {
dst[x] = tmpBuf[x] + COLOR_ALPHA_BLEND(dst[x], 255 - COLOR_ALPHA(tmpBuf[x]));
}
//Opaque Gradient
} else {
for (uint32_t y = 0; y < h; ++y) {
- auto dst = &buffer[y * surface.stride];
- fillFetchLinear(fill, dst, region.min.y + y, region.min.x, w);
+ fillFetchLinear(fill, buffer + y * surface.stride, region.min.y + y, region.min.x, 0, w);
}
}
return true;
if (fill->translucent) {
for (uint32_t i = 0; i < rle->size; ++i) {
auto dst = &surface.buffer[span->y * surface.stride + span->x];
- fillFetchLinear(fill, buf, span->y, span->x, span->len);
+ fillFetchLinear(fill, buf, span->y, span->x, 0, span->len);
if (span->coverage == 255) {
for (uint32_t i = 0; i < span->len; ++i) {
dst[i] = buf[i] + COLOR_ALPHA_BLEND(dst[i], 255 - COLOR_ALPHA(buf[i]));
//Opaque Gradient
} else {
for (uint32_t i = 0; i < rle->size; ++i) {
- auto dst = &surface.buffer[span->y * surface.stride + span->x];
if (span->coverage == 255) {
- fillFetchLinear(fill, dst, span->y, span->x, span->len);
+ fillFetchLinear(fill, surface.buffer + span->y * surface.stride, span->y, span->x, span->x, span->len);
} else {
- fillFetchLinear(fill, buf, span->y, span->x, span->len);
+ auto dst = &surface.buffer[span->y * surface.stride + span->x];
+ fillFetchLinear(fill, buf, span->y, span->x, 0, span->len);
auto ialpha = 255 - span->coverage;
for (uint32_t i = 0; i < span->len; ++i) {
dst[i] = COLOR_ALPHA_BLEND(buf[i], span->coverage) + COLOR_ALPHA_BLEND(dst[i], ialpha);
if (!surface.buffer || surface.stride <= 0 || surface.w <= 0 || surface.h <= 0) return false;
if (surface.w == surface.stride) {
- COLOR_SET(surface.buffer, 0xff000000, surface.w * surface.h);
+ rasterARGB32(surface.buffer, 0x00000000, 0, surface.w * surface.h);
} else {
for (uint32_t i = 0; i < surface.h; i++) {
- COLOR_SET(surface.buffer + surface.stride * i, 0xff000000, surface.w);
+ rasterARGB32(surface.buffer + surface.stride * i, 0x00000000, 0, surface.w);
}
}
return true;