From 8dc22a5d9a00df46a16f8c9b4e366630d3170b5a Mon Sep 17 00:00:00 2001 From: Subhransu Mohanty Date: Thu, 23 Jul 2020 15:31:16 +0900 Subject: [PATCH] vector: rendering refactor. - replaced implementation form qt. - refactor render function table. - ran clang-format --- src/vector/CMakeLists.txt | 2 +- src/vector/meson.build | 2 +- src/vector/vdrawhelper.cpp | 413 ++++++------------ src/vector/vdrawhelper.h | 207 +++++---- ...onfunctions.cpp => vdrawhelper_common.cpp} | 99 +++-- src/vector/vdrawhelper_neon.cpp | 7 +- src/vector/vdrawhelper_sse2.cpp | 302 ++----------- src/vector/vglobal.h | 3 +- src/vector/vpainter.cpp | 2 +- 9 files changed, 337 insertions(+), 700 deletions(-) rename src/vector/{vcompositionfunctions.cpp => vdrawhelper_common.cpp} (55%) diff --git a/src/vector/CMakeLists.txt b/src/vector/CMakeLists.txt index 7993ec7..3ae96e6 100644 --- a/src/vector/CMakeLists.txt +++ b/src/vector/CMakeLists.txt @@ -10,7 +10,7 @@ target_sources(rlottie "${CMAKE_CURRENT_LIST_DIR}/vbrush.cpp" "${CMAKE_CURRENT_LIST_DIR}/vbitmap.cpp" "${CMAKE_CURRENT_LIST_DIR}/vpainter.cpp" - "${CMAKE_CURRENT_LIST_DIR}/vcompositionfunctions.cpp" + "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper_common.cpp" "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper.cpp" "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper_sse2.cpp" "${CMAKE_CURRENT_LIST_DIR}/vdrawhelper_neon.cpp" diff --git a/src/vector/meson.build b/src/vector/meson.build index cb74d5a..1526a9c 100644 --- a/src/vector/meson.build +++ b/src/vector/meson.build @@ -8,7 +8,7 @@ source_file = [ 'vbrush.cpp', 'vbitmap.cpp', 'vpainter.cpp', - 'vcompositionfunctions.cpp', + 'vdrawhelper_common.cpp', 'vdrawhelper.cpp', 'vdrawhelper_sse2.cpp', 'vdrawhelper_neon.cpp', diff --git a/src/vector/vdrawhelper.cpp b/src/vector/vdrawhelper.cpp index afd44e5..e983d6c 100644 --- a/src/vector/vdrawhelper.cpp +++ b/src/vector/vdrawhelper.cpp @@ -56,6 +56,17 @@ #include #include #include +#include + +static RenderFuncTable RenderTable; + +void VTextureData::setClip(const VRect &clip) +{ + left = clip.left(); + top = clip.top(); + right = std::min(clip.right(), int(width())) - 1; + bottom = std::min(clip.bottom(), int(height())) - 1; +} class VGradientCache { public: @@ -76,7 +87,8 @@ public: VCacheData info; const VGradientStops &stops = gradient.mStops; for (uint i = 0; i < stops.size() && i <= 2; i++) - hash_val += VCacheKey(stops[i].second.premulARGB() * gradient.alpha()); + hash_val += + VCacheKey(stops[i].second.premulARGB() * gradient.alpha()); { std::lock_guard guard(mMutex); @@ -112,10 +124,10 @@ public: } static VGradientCache &instance() - { - static VGradientCache CACHE; - return CACHE; - } + { + static VGradientCache CACHE; + return CACHE; + } protected: uint maxCacheSize() const { return 60; } @@ -147,7 +159,7 @@ bool VGradientCache::generateGradientColorTable(const VGradientStops &stops, uint32_t *colorTable, int size) { int dist, idist, pos = 0; - size_t i; + size_t i; bool alpha = false; size_t stopCount = stops.size(); const VGradientStop *curr, *next, *start; @@ -182,7 +194,7 @@ bool VGradientCache::generateGradientColorTable(const VGradientStops &stops, dist = (int)(255 * t); idist = 255 - dist; colorTable[pos] = - INTERPOLATE_PIXEL_255(curColor, idist, nextColor, dist); + interpolate_pixel(curColor, idist, nextColor, dist); ++pos; fpos += incr; } @@ -201,7 +213,7 @@ void VRasterBuffer::clear() memset(mBuffer, 0, mHeight * mBytesPerLine); } -VBitmap::Format VRasterBuffer::prepare(VBitmap *image) +VBitmap::Format VRasterBuffer::prepare(const VBitmap *image) { mBuffer = image->data(); mWidth = image->width(); @@ -222,12 +234,6 @@ void VSpanData::init(VRasterBuffer *image) mUnclippedBlendFunc = nullptr; } -extern CompositionFunction COMP_functionForMode_C[]; -extern CompositionFunctionSolid COMP_functionForModeSolid_C[]; -static const CompositionFunction * functionForMode = COMP_functionForMode_C; -static const CompositionFunctionSolid *functionForModeSolid = - COMP_functionForModeSolid_C; - /* * Gradient Draw routines * @@ -505,8 +511,7 @@ void fetch_radial_gradient(uint32_t *buffer, const Operator *op, } } -static inline Operator getOperator(const VSpanData *data, const VRle::Span *, - size_t) +static inline Operator getOperator(const VSpanData *data) { Operator op; bool solidSource = false; @@ -532,69 +537,60 @@ static inline Operator getOperator(const VSpanData *data, const VRle::Span *, } op.mode = data->mBlendMode; - if (op.mode == BlendMode::SrcOver && solidSource) - op.mode = BlendMode::Src; + if (op.mode == BlendMode::SrcOver && solidSource) op.mode = BlendMode::Src; - op.funcSolid = functionForModeSolid[uint(op.mode)]; - op.func = functionForMode[uint(op.mode)]; + op.funcSolid = RenderTable.color(op.mode); + op.func = RenderTable.src(op.mode); return op; } -static void blendColorARGB(size_t count, const VRle::Span *spans, - void *userData) +static void blend_color(size_t size, const VRle::Span *array, void *userData) { VSpanData *data = (VSpanData *)(userData); - Operator op = getOperator(data, spans, count); + Operator op = getOperator(data); const uint color = data->mSolid; - if (op.mode == BlendMode::Src) { - // inline for performance - while (count--) { - uint *target = data->buffer(spans->x, spans->y); - if (spans->coverage == 255) { - memfill32(target, color, spans->len); - } else { - uint c = BYTE_MUL(color, spans->coverage); - int ialpha = 255 - spans->coverage; - for (int i = 0; i < spans->len; ++i) - target[i] = c + BYTE_MUL(target[i], ialpha); - } - ++spans; - } - return; + for (size_t i = 0 ; i < size; ++i) { + const auto &span = array[i]; + op.funcSolid(data->buffer(span.x, span.y), span.len, color, span.coverage); } +} - while (count--) { - uint *target = data->buffer(spans->x, spans->y); - op.funcSolid(target, spans->len, color, spans->coverage); - ++spans; +// Signature of Process Object +// void Pocess(uint* scratchBuffer, size_t x, size_t y, uchar cov) +template +static inline void process_in_chunk(const VRle::Span *array, size_t size, + Process process) +{ + std::array buf; + for (size_t i = 0; i < size; i++) { + const auto &span = array[i]; + size_t len = span.len; + size_t x = span.x; + while (len) { + auto l = std::min(len, buf.size()); + process(buf.data(), x, span.y, l, span.coverage); + x += l; + len -= l; + } } } -#define BLEND_GRADIENT_BUFFER_SIZE 2048 -static void blendGradientARGB(size_t count, const VRle::Span *spans, - void *userData) +static void blend_gradient(size_t size, const VRle::Span *array, + void *userData) { VSpanData *data = (VSpanData *)(userData); - Operator op = getOperator(data, spans, count); - - unsigned int buffer[BLEND_GRADIENT_BUFFER_SIZE]; + Operator op = getOperator(data); if (!op.srcFetch) return; - while (count--) { - uint *target = data->buffer(spans->x, spans->y); - int length = spans->len; - while (length) { - int l = std::min(length, BLEND_GRADIENT_BUFFER_SIZE); - op.srcFetch(buffer, &op, data, spans->y, spans->x, l); - op.func(target, buffer, l, spans->coverage); - target += l; - length -= l; - } - ++spans; - } + process_in_chunk( + array, size, + [&](uint *scratch, size_t x, size_t y, size_t len, uchar cov) { + op.srcFetch(scratch, &op, data, y, x, len); + op.func(data->buffer(x, y), len, scratch, cov); + }); } template @@ -603,156 +599,82 @@ constexpr const T &clamp(const T &v, const T &lo, const T &hi) return v < lo ? lo : hi < v ? hi : v; } -static const int buffer_size = 1024; -static const int fixed_scale = 1 << 16; -static void blend_transformed_argb(size_t count, const VRle::Span *spans, - void *userData) +static constexpr inline uchar alpha_mul(uchar a, uchar b) +{ + return ((a * b) >> 8); +} + +static void blend_image_xform(size_t size, const VRle::Span *array, + void *userData) { - VSpanData *data = reinterpret_cast(userData); - if (data->mBitmap.format != VBitmap::Format::ARGB32_Premultiplied && - data->mBitmap.format != VBitmap::Format::ARGB32) { + const auto data = reinterpret_cast(userData); + const auto &src = data->texture(); + + if (src.format() != VBitmap::Format::ARGB32_Premultiplied && + src.format() != VBitmap::Format::ARGB32) { //@TODO other formats not yet handled. return; } - Operator op = getOperator(data, spans, count); - uint buffer[buffer_size]; - - const int image_x1 = data->mBitmap.x1; - const int image_y1 = data->mBitmap.y1; - const int image_x2 = data->mBitmap.x2 - 1; - const int image_y2 = data->mBitmap.y2 - 1; - - if (data->fast_matrix) { - // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - uint *target = data->buffer(spans->x, spans->y); - - const float cx = spans->x + float(0.5); - const float cy = spans->y + float(0.5); - - int x = - int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale); - int y = - int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale); - - int length = spans->len; - const int coverage = - (spans->coverage * data->mBitmap.const_alpha) >> 8; - while (length) { - int l = std::min(length, buffer_size); - const uint *end = buffer + l; - uint * b = buffer; - while (b < end) { - int px = clamp(x >> 16, image_x1, image_x2); - int py = clamp(y >> 16, image_y1, image_y2); - *b = reinterpret_cast( - data->mBitmap.scanLine(py))[px]; - - x += fdx; - y += fdy; - ++b; - } - op.func(target, buffer, l, coverage); - target += l; - length -= l; + Operator op = getOperator(data); + + process_in_chunk( + array, size, + [&](uint *scratch, size_t x, size_t y, size_t len, uchar cov) { + const auto coverage = (cov * src.alpha()) >> 8; + const float xfactor = y * data->m21 + data->dx + data->m11; + const float yfactor = y * data->m22 + data->dy + data->m12; + for (size_t i = 0; i < len; i++) { + const float fx = (x + i) * data->m11 + xfactor; + const float fy = (x + i) * data->m12 + yfactor; + const int px = clamp(int(fx), src.left, src.right); + const int py = clamp(int(fy), src.top, src.bottom); + scratch[i] = src.pixel(px, py); } - ++spans; - } - } else { - const float fdx = data->m11; - const float fdy = data->m12; - const float fdw = data->m13; - while (count--) { - uint *target = data->buffer(spans->x, spans->y); - - const float cx = spans->x + float(0.5); - const float cy = spans->y + float(0.5); - - float x = data->m21 * cy + data->m11 * cx + data->dx; - float y = data->m22 * cy + data->m12 * cx + data->dy; - float w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - const int coverage = - (spans->coverage * data->mBitmap.const_alpha) >> 8; - while (length) { - int l = std::min(length, buffer_size); - const uint *end = buffer + l; - uint * b = buffer; - while (b < end) { - const float iw = w == 0 ? 1 : 1 / w; - const float tx = x * iw; - const float ty = y * iw; - const int px = - clamp(int(tx) - (tx < 0), image_x1, image_x2); - const int py = - clamp(int(ty) - (ty < 0), image_y1, image_y2); - - *b = reinterpret_cast( - data->mBitmap.scanLine(py))[px]; - x += fdx; - y += fdy; - w += fdw; - - ++b; - } - op.func(target, buffer, l, coverage); - target += l; - length -= l; - } - ++spans; - } - } + op.func(data->buffer(x, y), len, scratch, coverage); + }); } -static void blend_untransformed_argb(size_t count, const VRle::Span *spans, - void *userData) +static void blend_image(size_t size, const VRle::Span *array, void *userData) { - VSpanData *data = reinterpret_cast(userData); - if (data->mBitmap.format != VBitmap::Format::ARGB32_Premultiplied && - data->mBitmap.format != VBitmap::Format::ARGB32) { + const auto data = reinterpret_cast(userData); + const auto &src = data->texture(); + + if (src.format() != VBitmap::Format::ARGB32_Premultiplied && + src.format() != VBitmap::Format::ARGB32) { //@TODO other formats not yet handled. return; } - Operator op = getOperator(data, spans, count); - - const int image_width = data->mBitmap.width; - const int image_height = data->mBitmap.height; - - int xoff = int(data->dx); - int yoff = int(data->dy); - - while (count--) { - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) length = image_width - sx; - if (length > 0) { - const int coverage = - (spans->coverage * data->mBitmap.const_alpha) >> 8; - const uint *src = (const uint *)data->mBitmap.scanLine(sy) + sx; - uint * dest = data->buffer(x, spans->y); - op.func(dest, src, length, coverage); - } + Operator op = getOperator(data); + + for (size_t i = 0; i < size; i++) { + const auto &span = array[i]; + int x = span.x; + int length = span.len; + int sx = x + int(data->dx); + int sy = span.y + int(data->dy); + + // notyhing to copy. + if (sy < 0 || sy >= int(src.height()) || sx >= int(src.width()) || + (sx + length) <= 0) + continue; + + // intersecting left edge of image + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; } - ++spans; + // intersecting right edge of image + if (sx + length > int(src.width())) length = src.width() - sx; + + op.func(data->buffer(x, span.y), length, src.pixelRef(sx, sy), + alpha_mul(span.coverage, src.alpha())); } } -void VSpanData::setup(const VBrush &brush, BlendMode /*mode*/, - int /*alpha*/) +void VSpanData::setup(const VBrush &brush, BlendMode /*mode*/, int /*alpha*/) { transformType = VMatrix::MatrixType::None; @@ -794,9 +716,8 @@ void VSpanData::setup(const VBrush &brush, BlendMode /*mode*/, } case VBrush::Type::Texture: { mType = VSpanData::Type::Texture; - initTexture( - &brush.mTexture->mBitmap, brush.mTexture->mAlpha, VBitmapData::Plain, - brush.mTexture->mBitmap.rect()); + initTexture(&brush.mTexture->mBitmap, brush.mTexture->mAlpha, + brush.mTexture->mBitmap.rect()); setupMatrix(brush.mTexture->mMatrix); break; } @@ -828,23 +749,12 @@ void VSpanData::setupMatrix(const VMatrix &matrix) } void VSpanData::initTexture(const VBitmap *bitmap, int alpha, - VBitmapData::Type type, const VRect &sourceRect) + const VRect &sourceRect) { mType = VSpanData::Type::Texture; - - mBitmap.imageData = bitmap->data(); - mBitmap.width = int(bitmap->width()); - mBitmap.height = int(bitmap->height()); - mBitmap.bytesPerLine = bitmap->stride(); - mBitmap.format = bitmap->format(); - mBitmap.x1 = sourceRect.x(); - mBitmap.y1 = sourceRect.y(); - mBitmap.x2 = std::min(mBitmap.x1 + sourceRect.width(), mBitmap.width); - mBitmap.y2 = std::min(mBitmap.y1 + sourceRect.height(), mBitmap.height); - - mBitmap.const_alpha = alpha; - mBitmap.type = type; - + mTexture.prepare(bitmap); + mTexture.setClip(sourceRect); + mTexture.setAlpha(alpha); updateSpanFunc(); } @@ -855,19 +765,19 @@ void VSpanData::updateSpanFunc() mUnclippedBlendFunc = nullptr; break; case VSpanData::Type::Solid: - mUnclippedBlendFunc = &blendColorARGB; + mUnclippedBlendFunc = &blend_color; break; case VSpanData::Type::LinearGradient: case VSpanData::Type::RadialGradient: { - mUnclippedBlendFunc = &blendGradientARGB; + mUnclippedBlendFunc = &blend_gradient; break; } case VSpanData::Type::Texture: { //@TODO update proper image function. if (transformType <= VMatrix::MatrixType::Translate) { - mUnclippedBlendFunc = &blend_untransformed_argb; + mUnclippedBlendFunc = &blend_image; } else { - mUnclippedBlendFunc = &blend_transformed_argb; + mUnclippedBlendFunc = &blend_image_xform; } break; } @@ -877,77 +787,10 @@ void VSpanData::updateSpanFunc() #if !defined(__SSE2__) && !defined(__ARM_NEON__) void memfill32(uint32_t *dest, uint32_t value, int length) { - int n; - - if (length <= 0) return; - - // Cute hack to align future memcopy operation - // and do unroll the loop a bit. Not sure it is - // the most efficient, but will do for now. - n = (length + 7) / 8; - switch (length & 0x07) { - case 0: - do { - *dest++ = value; - VECTOR_FALLTHROUGH; - case 7: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 6: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 5: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 4: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 3: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 2: - *dest++ = value; - VECTOR_FALLTHROUGH; - case 1: - *dest++ = value; - } while (--n > 0); + // let compiler do the auto vectorization. + for (int i = 0 ; i < length; i++) { + *dest++ = value; } } #endif -void vInitDrawhelperFunctions() -{ - vInitBlendFunctions(); - -#if defined(__ARM_NEON__) - // update fast path for NEON - extern void Vcomp_func_solid_SourceOver_neon( - uint32_t * dest, int length, uint32_t color, uint32_t const_alpha); - - COMP_functionForModeSolid_C[uint(BlendMode::SrcOver)] = - Vcomp_func_solid_SourceOver_neon; -#endif - -#if defined(__SSE2__) - // update fast path for SSE2 - extern void Vcomp_func_solid_SourceOver_sse2( - uint32_t * dest, int length, uint32_t color, uint32_t const_alpha); - extern void Vcomp_func_solid_Source_sse2( - uint32_t * dest, int length, uint32_t color, uint32_t const_alpha); - extern void Vcomp_func_Source_sse2(uint32_t * dest, const uint32_t *src, - int length, uint32_t const_alpha); - extern void Vcomp_func_SourceOver_sse2(uint32_t * dest, const uint32_t *src, - int length, uint32_t const_alpha); - - COMP_functionForModeSolid_C[uint(BlendMode::Src)] = - Vcomp_func_solid_Source_sse2; - COMP_functionForModeSolid_C[uint(BlendMode::SrcOver)] = - Vcomp_func_solid_SourceOver_sse2; - - COMP_functionForMode_C[uint(BlendMode::Src)] = Vcomp_func_Source_sse2; - // COMP_functionForMode_C[uint(BlendMode::SrcOver)] = - // Vcomp_func_SourceOver_sse2; -#endif -} - -V_CONSTRUCTOR_FUNCTION(vInitDrawhelperFunctions) diff --git a/src/vector/vdrawhelper.h b/src/vector/vdrawhelper.h index a2c7335..b815009 100644 --- a/src/vector/vdrawhelper.h +++ b/src/vector/vdrawhelper.h @@ -1,26 +1,26 @@ -/* +/* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All rights reserved. - * + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef VDRAWHELPER_H #define VDRAWHELPER_H -#include #include +#include #include "assert.h" #include "vbitmap.h" #include "vbrush.h" @@ -32,10 +32,54 @@ V_USE_NAMESPACE struct VSpanData; struct Operator; -typedef void (*CompositionFunctionSolid)(uint32_t *dest, int length, - uint32_t color, uint32_t const_alpha); -typedef void (*CompositionFunction)(uint32_t *dest, const uint32_t *src, - int length, uint32_t const_alpha); +struct RenderFunc +{ + using Color = void (*)(uint32_t *dest, int length, uint32_t color, uint32_t alpha); + using Src = void (*)(uint32_t *dest, int length, const uint32_t *src, uint32_t alpha); + enum class Type { + Invalid, + Color, + Src, + }; + RenderFunc() = default; + RenderFunc(Type t, Color f):type_(t), color_(f){assert(t == Type::Color);} + RenderFunc(Type t, Src f):type_(t), src_(f){ assert(t == Type::Src);} + + Type type_{Type::Invalid}; + union { + Color color_; + Src src_; + }; +}; + +class RenderFuncTable +{ +public: + RenderFuncTable(); + RenderFunc::Color color(BlendMode mode) const + { + return colorTable[uint32_t(mode)].color_; + } + RenderFunc::Src src(BlendMode mode) const + { + return srcTable[uint32_t(mode)].src_; + } +private: + void neon(); + void sse(); + void updateColor(BlendMode mode, RenderFunc::Color f) + { + colorTable[uint32_t(mode)] = {RenderFunc::Type::Color, f}; + } + void updateSrc(BlendMode mode, RenderFunc::Src f) + { + srcTable[uint32_t(mode)] = {RenderFunc::Type::Src, f}; + } +private: + std::array colorTable; + std::array srcTable; +}; + typedef void (*SourceFetchProc)(uint32_t *buffer, const Operator *o, const VSpanData *data, int y, int x, int length); @@ -62,10 +106,10 @@ struct RadialGradientValues { }; struct Operator { - BlendMode mode; - SourceFetchProc srcFetch; - CompositionFunctionSolid funcSolid; - CompositionFunction func; + BlendMode mode; + SourceFetchProc srcFetch; + RenderFunc::Color funcSolid; + RenderFunc::Src func; union { LinearGradientValues linear; RadialGradientValues radial; @@ -74,7 +118,7 @@ struct Operator { class VRasterBuffer { public: - VBitmap::Format prepare(VBitmap *image); + VBitmap::Format prepare(const VBitmap *image); void clear(); void resetBuffer(int val = 0); @@ -85,27 +129,32 @@ public: assert(size_t(y) < mHeight); return mBuffer + y * mBytesPerLine; } + uint32_t *pixelRef(int x, int y) const + { + return (uint32_t *)(mBuffer + y * mBytesPerLine + x * mBytesPerPixel); + } - size_t width() const { return mWidth; } - size_t height() const { return mHeight; } - size_t bytesPerLine() const { return mBytesPerLine; } - size_t bytesPerPixel() const { return mBytesPerPixel; } + size_t width() const { return mWidth; } + size_t height() const { return mHeight; } + size_t bytesPerLine() const { return mBytesPerLine; } + size_t bytesPerPixel() const { return mBytesPerPixel; } + VBitmap::Format format() const { return mFormat; } - VBitmap::Format mFormat{VBitmap::Format::ARGB32_Premultiplied}; private: - size_t mWidth{0}; - size_t mHeight{0}; - size_t mBytesPerLine{0}; - size_t mBytesPerPixel{0}; - uchar *mBuffer{nullptr}; + VBitmap::Format mFormat{VBitmap::Format::ARGB32_Premultiplied}; + size_t mWidth{0}; + size_t mHeight{0}; + size_t mBytesPerLine{0}; + size_t mBytesPerPixel{0}; + mutable uchar * mBuffer{nullptr}; }; struct VGradientData { VGradient::Spread mSpread; - struct Linear{ + struct Linear { float x1, y1, x2, y2; }; - struct Radial{ + struct Radial { float cx, cy, fx, fy, cradius, fradius; }; union { @@ -116,31 +165,21 @@ struct VGradientData { bool mColorTableAlpha; }; -struct VBitmapData -{ - const uchar *imageData; - const uchar *scanLine(int y) const { return imageData + y*bytesPerLine; } - - int width; - int height; +struct VTextureData : public VRasterBuffer { + uint32_t pixel(int x, int y) const { return *pixelRef(x, y); }; + uchar alpha() const { return mAlpha; } + void setAlpha(uchar alpha) { mAlpha = alpha; } + void setClip(const VRect &clip); // clip rect - int x1; - int y1; - int x2; - int y2; - uint bytesPerLine; - VBitmap::Format format; - bool hasAlpha; - enum Type { - Plain, - Tiled - }; - Type type; - int const_alpha; + int left; + int right; + int top; + int bottom; + bool hasAlpha; + uchar mAlpha; }; -struct VColorTable -{ +struct VColorTable { uint32_t buffer32[VGradient::colorTableSize]; bool alpha{true}; }; @@ -148,12 +187,11 @@ struct VColorTable struct VSpanData { enum class Type { None, Solid, LinearGradient, RadialGradient, Texture }; - void updateSpanFunc(); - void init(VRasterBuffer *image); - void setup(const VBrush & brush, - BlendMode mode = BlendMode::SrcOver, - int alpha = 255); - void setupMatrix(const VMatrix &matrix); + void updateSpanFunc(); + void init(VRasterBuffer *image); + void setup(const VBrush &brush, BlendMode mode = BlendMode::SrcOver, + int alpha = 255); + void setupMatrix(const VMatrix &matrix); VRect clipRect() const { @@ -168,31 +206,28 @@ struct VSpanData { uint *buffer(int x, int y) const { - return (uint *)(mRasterBuffer->scanLine(y + mOffset.y())) + x + mOffset.x(); + return mRasterBuffer->pixelRef(x + mOffset.x(), y + mOffset.y()); } - void initTexture(const VBitmap *image, int alpha, VBitmapData::Type type, const VRect &sourceRect); - - BlendMode mBlendMode{BlendMode::SrcOver}; - VRasterBuffer * mRasterBuffer; - ProcessRleSpan mBlendFunc; - ProcessRleSpan mUnclippedBlendFunc; - VSpanData::Type mType; - std::shared_ptr mColorTable{nullptr}; - VPoint mOffset; // offset to the subsurface - VSize mDrawableSize;// suburface size - union { - uint32_t mSolid; - VGradientData mGradient; - VBitmapData mBitmap; - }; + void initTexture(const VBitmap *image, int alpha, const VRect &sourceRect); + const VTextureData &texture() const { return mTexture; } + + BlendMode mBlendMode{BlendMode::SrcOver}; + VRasterBuffer * mRasterBuffer; + ProcessRleSpan mBlendFunc; + ProcessRleSpan mUnclippedBlendFunc; + VSpanData::Type mType; + std::shared_ptr mColorTable{nullptr}; + VPoint mOffset; // offset to the subsurface + VSize mDrawableSize; // suburface size + uint32_t mSolid; + VGradientData mGradient; + VTextureData mTexture; + float m11, m12, m13, m21, m22, m23, m33, dx, dy; // inverse xform matrix - bool fast_matrix{true}; - VMatrix::MatrixType transformType{VMatrix::MatrixType::None}; + bool fast_matrix{true}; + VMatrix::MatrixType transformType{VMatrix::MatrixType::None}; }; -void vInitDrawhelperFunctions(); -extern void vInitBlendFunctions(); - #define BYTE_MUL(c, a) \ ((((((c) >> 8) & 0x00ff00ff) * (a)) & 0xff00ff00) + \ (((((c)&0x00ff00ff) * (a)) >> 8) & 0x00ff00ff)) @@ -217,7 +252,7 @@ inline constexpr int vAlpha(uint32_t c) return c >> 24; } -static inline uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) +static inline uint32_t interpolate_pixel(uint x, uint a, uint y, uint b) { uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; t >>= 8; @@ -228,22 +263,4 @@ static inline uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) return x; } -#define LOOP_ALIGNED_U1_A4(DEST, LENGTH, UOP, A4OP) \ - { \ - while ((uintptr_t)DEST & 0xF && LENGTH) \ - UOP \ - \ - while (LENGTH) \ - { \ - switch (LENGTH) { \ - case 3: \ - case 2: \ - case 1: \ - UOP break; \ - default: \ - A4OP break; \ - } \ - } \ - } - #endif // QDRAWHELPER_P_H diff --git a/src/vector/vcompositionfunctions.cpp b/src/vector/vdrawhelper_common.cpp similarity index 55% rename from src/vector/vcompositionfunctions.cpp rename to src/vector/vdrawhelper_common.cpp index 04c9cc2..d87069e 100644 --- a/src/vector/vcompositionfunctions.cpp +++ b/src/vector/vdrawhelper_common.cpp @@ -16,22 +16,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include "vdrawhelper.h" /* - result = s - dest = s * ca + d * cia +result = s +dest = s * ca + d * cia */ -static void comp_func_solid_Source(uint32_t *dest, int length, uint32_t color, - uint32_t const_alpha) +static void color_Source(uint32_t *dest, int length, uint32_t color, + uint32_t alpha) { int ialpha, i; - if (const_alpha == 255) { + if (alpha == 255) { memfill32(dest, color, length); } else { - ialpha = 255 - const_alpha; - color = BYTE_MUL(color, const_alpha); + ialpha = 255 - alpha; + color = BYTE_MUL(color, alpha); for (i = 0; i < length; ++i) dest[i] = color + BYTE_MUL(dest[i], ialpha); } @@ -45,13 +46,12 @@ static void comp_func_solid_Source(uint32_t *dest, int length, uint32_t color, = s * ca + d * (1 - sa*ca) = s' + d ( 1 - s'a) */ -static void comp_func_solid_SourceOver(uint32_t *dest, int length, - uint32_t color, - uint32_t const_alpha) +static void color_SourceOver(uint32_t *dest, int length, uint32_t color, + uint32_t alpha) { int ialpha, i; - if (const_alpha != 255) color = BYTE_MUL(color, const_alpha); + if (alpha != 255) color = BYTE_MUL(color, alpha); ialpha = 255 - vAlpha(color); for (i = 0; i < length; ++i) dest[i] = color + BYTE_MUL(dest[i], ialpha); } @@ -61,12 +61,12 @@ static void comp_func_solid_SourceOver(uint32_t *dest, int length, dest = d * sa * ca + d * cia = d * (sa * ca + cia) */ -static void comp_func_solid_DestinationIn(uint *dest, int length, uint color, - uint const_alpha) +static void color_DestinationIn(uint *dest, int length, uint color, + uint alpha) { uint a = vAlpha(color); - if (const_alpha != 255) { - a = BYTE_MUL(a, const_alpha) + 255 - const_alpha; + if (alpha != 255) { + a = BYTE_MUL(a, alpha) + 255 - alpha; } for (int i = 0; i < length; ++i) { dest[i] = BYTE_MUL(dest[i], a); @@ -78,26 +78,26 @@ static void comp_func_solid_DestinationIn(uint *dest, int length, uint color, dest = d * sia * ca + d * cia = d * (sia * ca + cia) */ -static void comp_func_solid_DestinationOut(uint *dest, int length, uint color, - uint const_alpha) +static void color_DestinationOut(uint *dest, int length, uint color, + uint alpha) { uint a = vAlpha(~color); - if (const_alpha != 255) a = BYTE_MUL(a, const_alpha) + 255 - const_alpha; + if (alpha != 255) a = BYTE_MUL(a, alpha) + 255 - alpha; for (int i = 0; i < length; ++i) { dest[i] = BYTE_MUL(dest[i], a); } } -static void comp_func_Source(uint32_t *dest, const uint32_t *src, int length, - uint32_t const_alpha) +static void src_Source(uint32_t *dest, int length, const uint32_t *src, + uint32_t alpha) { - if (const_alpha == 255) { + if (alpha == 255) { memcpy(dest, src, size_t(length) * sizeof(uint)); } else { - uint ialpha = 255 - const_alpha; + uint ialpha = 255 - alpha; for (int i = 0; i < length; ++i) { dest[i] = - INTERPOLATE_PIXEL_255(src[i], const_alpha, dest[i], ialpha); + interpolate_pixel(src[i], alpha, dest[i], ialpha); } } } @@ -105,13 +105,12 @@ static void comp_func_Source(uint32_t *dest, const uint32_t *src, int length, /* s' = s * ca * d' = s' + d (1 - s'a) */ -static void comp_func_SourceOver(uint32_t *dest, const uint32_t *src, - int length, - uint32_t const_alpha) +static void src_SourceOver(uint32_t *dest, int length, const uint32_t *src, + uint32_t alpha) { uint s, sia; - if (const_alpha == 255) { + if (alpha == 255) { for (int i = 0; i < length; ++i) { s = src[i]; if (s >= 0xff000000) @@ -126,51 +125,61 @@ static void comp_func_SourceOver(uint32_t *dest, const uint32_t *src, * dest = source' + dest ( 1- source'a) */ for (int i = 0; i < length; ++i) { - s = BYTE_MUL(src[i], const_alpha); + s = BYTE_MUL(src[i], alpha); sia = vAlpha(~s); dest[i] = s + BYTE_MUL(dest[i], sia); } } } -static void comp_func_DestinationIn(uint *dest, const uint *src, int length, - uint const_alpha) +static void src_DestinationIn(uint *dest, int length, const uint *src, + uint alpha) { - if (const_alpha == 255) { + if (alpha == 255) { for (int i = 0; i < length; ++i) { dest[i] = BYTE_MUL(dest[i], vAlpha(src[i])); } } else { - uint cia = 255 - const_alpha; + uint cia = 255 - alpha; for (int i = 0; i < length; ++i) { - uint a = BYTE_MUL(vAlpha(src[i]), const_alpha) + cia; + uint a = BYTE_MUL(vAlpha(src[i]), alpha) + cia; dest[i] = BYTE_MUL(dest[i], a); } } } -static void comp_func_DestinationOut(uint *dest, const uint *src, int length, - uint const_alpha) +static void src_DestinationOut(uint *dest, int length, const uint *src, + uint alpha) { - if (const_alpha == 255) { + if (alpha == 255) { for (int i = 0; i < length; ++i) { dest[i] = BYTE_MUL(dest[i], vAlpha(~src[i])); } } else { - uint cia = 255 - const_alpha; + uint cia = 255 - alpha; for (int i = 0; i < length; ++i) { - uint sia = BYTE_MUL(vAlpha(~src[i]), const_alpha) + cia; + uint sia = BYTE_MUL(vAlpha(~src[i]), alpha) + cia; dest[i] = BYTE_MUL(dest[i], sia); } } } -CompositionFunctionSolid COMP_functionForModeSolid_C[] = { - comp_func_solid_Source, comp_func_solid_SourceOver, - comp_func_solid_DestinationIn, comp_func_solid_DestinationOut}; +RenderFuncTable::RenderFuncTable() +{ + updateColor(BlendMode::Src, color_Source); + updateColor(BlendMode::SrcOver, color_SourceOver); + updateColor(BlendMode::DestIn, color_DestinationIn); + updateColor(BlendMode::DestOut, color_DestinationOut); -CompositionFunction COMP_functionForMode_C[] = { - comp_func_Source, comp_func_SourceOver, comp_func_DestinationIn, - comp_func_DestinationOut}; + updateSrc(BlendMode::Src, src_Source); + updateSrc(BlendMode::SrcOver, src_SourceOver); + updateSrc(BlendMode::DestIn, src_DestinationIn); + updateSrc(BlendMode::DestOut, src_DestinationOut); -void vInitBlendFunctions() {} +#if defined(__ARM_NEON__) + neon(); +#endif +#if defined(__SSE2__) + sse(); +#endif +} diff --git a/src/vector/vdrawhelper_neon.cpp b/src/vector/vdrawhelper_neon.cpp index 99fd34f..681eabb 100644 --- a/src/vector/vdrawhelper_neon.cpp +++ b/src/vector/vdrawhelper_neon.cpp @@ -17,7 +17,7 @@ void memfill32(uint32_t *dest, uint32_t value, int length) pixman_composite_src_n_8888_asm_neon(length, 1, dest, length, value); } -void Vcomp_func_solid_SourceOver_neon(uint32_t *dest, int length, +static void color_SourceOver(uint32_t *dest, int length, uint32_t color, uint32_t const_alpha) { @@ -25,4 +25,9 @@ void Vcomp_func_solid_SourceOver_neon(uint32_t *dest, int length, pixman_composite_over_n_8888_asm_neon(length, 1, dest, length, color); } + +void RenderFuncTable::neon() +{ + updateColor(BlendMode::Src , color_SourceOver); +} #endif diff --git a/src/vector/vdrawhelper_sse2.cpp b/src/vector/vdrawhelper_sse2.cpp index 6d05127..fd9b711 100644 --- a/src/vector/vdrawhelper_sse2.cpp +++ b/src/vector/vdrawhelper_sse2.cpp @@ -1,10 +1,10 @@ #if defined(__SSE2__) -#include "vdrawhelper.h" - +#include #include /* for SSE2 intrinsics */ #include /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ +#include "vdrawhelper.h" // Each 32bits components of alphaChannel must be in the form 0x00AA00AA inline static __m128i v4_byte_mul_sse2(__m128i c, __m128i a) { @@ -27,13 +27,6 @@ inline static __m128i v4_byte_mul_sse2(__m128i c, __m128i a) return _mm_add_epi32(v_ag, v_rb); } -static inline __m128i v4_ialpha_sse2(__m128i c) -{ - __m128i a = _mm_srli_epi32(c, 24); - - return _mm_sub_epi32(_mm_set1_epi32(0xff), a); -} - static inline __m128i v4_interpolate_color_sse2(__m128i a, __m128i c0, __m128i c1) { @@ -104,17 +97,29 @@ static inline __m128i v4_interpolate_color_sse2(__m128i a, __m128i c0, // Multiply src color with const_alpha #define V4_ALPHA_MULTIPLY v_src = v4_byte_mul_sse2(v_src, v_alpha); -// dest = src + dest * sia -#define V4_COMP_OP_SRC_OVER \ - __m128i v_sia = v4_ialpha_sse2(v_src); \ - v_sia = _mm_add_epi32(v_sia, _mm_slli_epi32(v_sia, 16)); \ - v_dest = v4_byte_mul_sse2(v_dest, v_sia); \ - v_src = _mm_add_epi32(v_src, v_dest); // dest = src + dest * sia #define V4_COMP_OP_SRC \ v_src = v4_interpolate_color_sse2(v_alpha, v_src, v_dest); +#define LOOP_ALIGNED_U1_A4(DEST, LENGTH, UOP, A4OP) \ + { \ + while ((uintptr_t)DEST & 0xF && LENGTH) \ + UOP \ + \ + while (LENGTH) \ + { \ + switch (LENGTH) { \ + case 3: \ + case 2: \ + case 1: \ + UOP break; \ + default: \ + A4OP break; \ + } \ + } \ + } + void memfill32(uint32_t* dest, uint32_t value, int length) { __m128i vector_data = _mm_set_epi32(value, value, value, value); @@ -171,7 +176,7 @@ void memfill32(uint32_t* dest, uint32_t value, int length) } // dest = color + (dest * alpha) -inline static void comp_func_helper_sse2(uint32_t* dest, int length, +inline static void copy_helper_sse2(uint32_t* dest, int length, uint32_t color, uint32_t alpha) { const __m128i v_color = _mm_set1_epi32(color); @@ -196,7 +201,7 @@ inline static void comp_func_helper_sse2(uint32_t* dest, int length, }) } -void Vcomp_func_solid_Source_sse2(uint32_t* dest, int length, uint32_t color, +static void color_Source(uint32_t* dest, int length, uint32_t color, uint32_t const_alpha) { if (const_alpha == 255) { @@ -206,11 +211,11 @@ void Vcomp_func_solid_Source_sse2(uint32_t* dest, int length, uint32_t color, ialpha = 255 - const_alpha; color = BYTE_MUL(color, const_alpha); - comp_func_helper_sse2(dest, length, color, ialpha); + copy_helper_sse2(dest, length, color, ialpha); } } -void Vcomp_func_solid_SourceOver_sse2(uint32_t* dest, int length, +static void color_SourceOver(uint32_t* dest, int length, uint32_t color, uint32_t const_alpha) { @@ -218,10 +223,10 @@ void Vcomp_func_solid_SourceOver_sse2(uint32_t* dest, int length, if (const_alpha != 255) color = BYTE_MUL(color, const_alpha); ialpha = 255 - vAlpha(color); - comp_func_helper_sse2(dest, length, color, ialpha); + copy_helper_sse2(dest, length, color, ialpha); } -void Vcomp_func_Source_sse2(uint32_t* dest, const uint32_t* src, int length, +static void src_Source(uint32_t* dest, int length, const uint32_t* src, uint32_t const_alpha) { int ialpha; @@ -233,8 +238,8 @@ void Vcomp_func_Source_sse2(uint32_t* dest, const uint32_t* src, int length, LOOP_ALIGNED_U1_A4(dest, length, { /* UOP */ - *dest = INTERPOLATE_PIXEL_255(*src, const_alpha, - *dest, ialpha); + *dest = interpolate_pixel(*src, const_alpha, + *dest, ialpha); dest++; src++; length--; @@ -245,255 +250,12 @@ void Vcomp_func_Source_sse2(uint32_t* dest, const uint32_t* src, int length, } } -void comp_func_SourceOver_sse2_1(uint32_t* dest, const uint32_t* src, - int length, uint32_t const_alpha) -{ - uint32_t s, sia; - - if (const_alpha == 255) { - LOOP_ALIGNED_U1_A4(dest, length, - { /* UOP */ - s = *src; - sia = vAlpha(~s); - *dest = s + BYTE_MUL(*dest, sia); - dest++; - src++; - length--; - }, - {/* A4OP */ - V4_FETCH_SRC_DEST V4_COMP_OP_SRC_OVER V4_STORE_DEST - V4_SRC_DEST_LEN_INC}) - } else { - __m128i v_alpha = _mm_set1_epi32(const_alpha); - LOOP_ALIGNED_U1_A4( - dest, length, - { /* UOP */ - s = BYTE_MUL(*src, const_alpha); - sia = vAlpha(~s); - *dest = s + BYTE_MUL(*dest, sia); - dest++; - src++; - length--; - }, - {/* A4OP */ - V4_FETCH_SRC_DEST V4_ALPHA_MULTIPLY V4_COMP_OP_SRC_OVER - V4_STORE_DEST V4_SRC_DEST_LEN_INC}) - } -} - -// Pixman implementation -#define force_inline inline - -static force_inline __m128i unpack_32_1x128(uint32_t data) -{ - return _mm_unpacklo_epi8(_mm_cvtsi32_si128(data), _mm_setzero_si128()); -} - -static force_inline void unpack_128_2x128(__m128i data, __m128i* data_lo, - __m128i* data_hi) -{ - *data_lo = _mm_unpacklo_epi8(data, _mm_setzero_si128()); - *data_hi = _mm_unpackhi_epi8(data, _mm_setzero_si128()); -} - -static force_inline uint32_t pack_1x128_32(__m128i data) -{ - return _mm_cvtsi128_si32(_mm_packus_epi16(data, _mm_setzero_si128())); -} - -static force_inline __m128i pack_2x128_128(__m128i lo, __m128i hi) +void RenderFuncTable::sse() { - return _mm_packus_epi16(lo, hi); -} + updateColor(BlendMode::Src , color_Source); + updateColor(BlendMode::SrcOver , color_SourceOver); -/* load 4 pixels from a 16-byte boundary aligned address */ -static force_inline __m128i load_128_aligned(__m128i* src) -{ - return _mm_load_si128(src); -} - -/* load 4 pixels from a unaligned address */ -static force_inline __m128i load_128_unaligned(const __m128i* src) -{ - return _mm_loadu_si128(src); -} - -/* save 4 pixels on a 16-byte boundary aligned address */ -static force_inline void save_128_aligned(__m128i* dst, __m128i data) -{ - _mm_store_si128(dst, data); -} - -static force_inline int is_opaque(__m128i x) -{ - __m128i ffs = _mm_cmpeq_epi8(x, x); - - return (_mm_movemask_epi8(_mm_cmpeq_epi8(x, ffs)) & 0x8888) == 0x8888; -} - -static force_inline int is_zero(__m128i x) -{ - return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff; -} - -static force_inline __m128i expand_alpha_1x128(__m128i data) -{ - return _mm_shufflehi_epi16( - _mm_shufflelo_epi16(data, _MM_SHUFFLE(3, 3, 3, 3)), - _MM_SHUFFLE(3, 3, 3, 3)); -} - -static force_inline __m128i create_mask_16_128(uint16_t mask) -{ - return _mm_set1_epi16(mask); -} - -static __m128i mask_0080 = create_mask_16_128(0x0080); -static __m128i mask_00ff = create_mask_16_128(0x00ff); -static __m128i mask_0101 = create_mask_16_128(0x0101); - -static force_inline __m128i negate_1x128(__m128i data) -{ - return _mm_xor_si128(data, mask_00ff); -} - -static force_inline void negate_2x128(__m128i data_lo, __m128i data_hi, - __m128i* neg_lo, __m128i* neg_hi) -{ - *neg_lo = _mm_xor_si128(data_lo, mask_00ff); - *neg_hi = _mm_xor_si128(data_hi, mask_00ff); -} - -static force_inline __m128i pix_multiply_1x128(__m128i data, __m128i alpha) -{ - return _mm_mulhi_epu16( - _mm_adds_epu16(_mm_mullo_epi16(data, alpha), mask_0080), mask_0101); -} - -static force_inline void pix_multiply_2x128(__m128i* data_lo, __m128i* data_hi, - __m128i* alpha_lo, - __m128i* alpha_hi, __m128i* ret_lo, - __m128i* ret_hi) -{ - __m128i lo, hi; - - lo = _mm_mullo_epi16(*data_lo, *alpha_lo); - hi = _mm_mullo_epi16(*data_hi, *alpha_hi); - lo = _mm_adds_epu16(lo, mask_0080); - hi = _mm_adds_epu16(hi, mask_0080); - *ret_lo = _mm_mulhi_epu16(lo, mask_0101); - *ret_hi = _mm_mulhi_epu16(hi, mask_0101); -} - -static force_inline __m128i over_1x128(__m128i src, __m128i alpha, __m128i dst) -{ - return _mm_adds_epu8(src, pix_multiply_1x128(dst, negate_1x128(alpha))); -} - -static force_inline void expand_alpha_2x128(__m128i data_lo, __m128i data_hi, - __m128i* alpha_lo, - __m128i* alpha_hi) -{ - __m128i lo, hi; - - lo = _mm_shufflelo_epi16(data_lo, _MM_SHUFFLE(3, 3, 3, 3)); - hi = _mm_shufflelo_epi16(data_hi, _MM_SHUFFLE(3, 3, 3, 3)); - - *alpha_lo = _mm_shufflehi_epi16(lo, _MM_SHUFFLE(3, 3, 3, 3)); - *alpha_hi = _mm_shufflehi_epi16(hi, _MM_SHUFFLE(3, 3, 3, 3)); -} - -static force_inline void over_2x128(__m128i* src_lo, __m128i* src_hi, - __m128i* alpha_lo, __m128i* alpha_hi, - __m128i* dst_lo, __m128i* dst_hi) -{ - __m128i t1, t2; - - negate_2x128(*alpha_lo, *alpha_hi, &t1, &t2); - - pix_multiply_2x128(dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); - - *dst_lo = _mm_adds_epu8(*src_lo, *dst_lo); - *dst_hi = _mm_adds_epu8(*src_hi, *dst_hi); -} - -static force_inline uint32_t core_combine_over_u_pixel_sse2(uint32_t src, - uint32_t dst) -{ - uint8_t a; - __m128i xmms; - - a = src >> 24; - - if (a == 0xff) { - return src; - } else if (src) { - xmms = unpack_32_1x128(src); - return pack_1x128_32( - over_1x128(xmms, expand_alpha_1x128(xmms), unpack_32_1x128(dst))); - } - - return dst; -} - -// static force_inline void -// core_combine_over_u_sse2_no_mask (uint32_t * pd, -// const uint32_t* ps, -// int w) -void Vcomp_func_SourceOver_sse2(uint32_t* pd, const uint32_t* ps, int w, - uint32_t) -{ - uint32_t s, d; - - /* Align dst on a 16-byte boundary */ - while (w && ((uintptr_t)pd & 15)) { - d = *pd; - s = *ps; - - if (s) *pd = core_combine_over_u_pixel_sse2(s, d); - pd++; - ps++; - w--; - } - - while (w >= 4) { - __m128i src; - __m128i src_hi, src_lo, dst_hi, dst_lo; - __m128i alpha_hi, alpha_lo; - - src = load_128_unaligned((__m128i*)ps); - - if (!is_zero(src)) { - if (is_opaque(src)) { - save_128_aligned((__m128i*)pd, src); - } else { - __m128i dst = load_128_aligned((__m128i*)pd); - - unpack_128_2x128(src, &src_lo, &src_hi); - unpack_128_2x128(dst, &dst_lo, &dst_hi); - - expand_alpha_2x128(src_lo, src_hi, &alpha_lo, &alpha_hi); - over_2x128(&src_lo, &src_hi, &alpha_lo, &alpha_hi, &dst_lo, - &dst_hi); - - save_128_aligned((__m128i*)pd, pack_2x128_128(dst_lo, dst_hi)); - } - } - - ps += 4; - pd += 4; - w -= 4; - } - while (w) { - d = *pd; - s = *ps; - - if (s) *pd = core_combine_over_u_pixel_sse2(s, d); - pd++; - ps++; - - w--; - } + updateSrc(BlendMode::Src , src_Source); } #endif diff --git a/src/vector/vglobal.h b/src/vector/vglobal.h index 6b41fe7..bd5a6d7 100644 --- a/src/vector/vglobal.h +++ b/src/vector/vglobal.h @@ -274,7 +274,8 @@ enum class BlendMode { Src, SrcOver, DestIn, - DestOut + DestOut, + Last, }; #ifndef V_CONSTRUCTOR_FUNCTION diff --git a/src/vector/vpainter.cpp b/src/vector/vpainter.cpp index edf70bd..55f2c6e 100644 --- a/src/vector/vpainter.cpp +++ b/src/vector/vpainter.cpp @@ -78,7 +78,7 @@ void VPainter::drawBitmapUntransform(const VRect & target, const VRect & source, uint8_t const_alpha) { - mSpanData.initTexture(&bitmap, const_alpha, VBitmapData::Plain, source); + mSpanData.initTexture(&bitmap, const_alpha, source); if (!mSpanData.mUnclippedBlendFunc) return; mSpanData.dx = float(-target.x()); mSpanData.dy = float(-target.y()); -- 2.34.1