platform/graphics/filters/SourceAlpha.cpp
platform/graphics/filters/SourceGraphic.cpp
- platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp
platform/graphics/filters/arm/FECompositeArithmeticNEON.h
platform/graphics/filters/arm/FEGaussianBlurNEON.cpp
platform/graphics/filters/arm/FEGaussianBlurNEON.h
+2012-05-17 Zoltan Herczeg <zherczeg@webkit.org>
+
+ NEON intrinsic should be used with arithmetic mode of composite filter
+ https://bugs.webkit.org/show_bug.cgi?id=86622
+
+ Reviewed by Nikolas Zimmermann.
+
+ Rewrite hand written assembly code to increase portability and readibility
+ of the code. Remove the unnecessary FECompositeArithmeticNEON.cpp from the
+ project
+
+ Existing tests cover this issue.
+
+ * CMakeLists.txt:
+ * GNUmakefile.list.am:
+ * Target.pri:
+ * WebCore.gypi:
+ * WebCore.vcproj/WebCore.vcproj:
+ * WebCore.xcodeproj/project.pbxproj:
+ * platform/graphics/filters/FEComposite.cpp:
+ (WebCore):
+ (WebCore::computeArithmeticPixels):
+ (WebCore::arithmeticSoftware):
+ (WebCore::FEComposite::platformArithmeticSoftware):
+ * platform/graphics/filters/FEComposite.h:
+ * platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp: Removed.
+ * platform/graphics/filters/arm/FECompositeArithmeticNEON.h:
+ (WebCore):
+ (WebCore::FEComposite::computeArithmeticPixelsNeon):
+ (WebCore::FEComposite::platformArithmeticNeon):
+
2012-05-17 Takashi Sakamoto <tasak@google.com>
showNodePath will be useful for debugging purpose.
Source/WebCore/platform/graphics/filters/SourceGraphic.h \
Source/WebCore/platform/graphics/filters/SpotLightSource.cpp \
Source/WebCore/platform/graphics/filters/SpotLightSource.h \
- Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp \
Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h \
platform/graphics/filters/SpotLightSource.cpp \
platform/graphics/filters/SourceAlpha.cpp \
platform/graphics/filters/SourceGraphic.cpp \
- platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp \
platform/graphics/filters/arm/FELightingNEON.cpp \
platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
}
'platform/graphics/filters/SourceGraphic.h',
'platform/graphics/filters/SpotLightSource.cpp',
'platform/graphics/filters/SpotLightSource.h',
- 'platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp',
'platform/graphics/filters/arm/FECompositeArithmeticNEON.h',
'platform/graphics/filters/arm/FEGaussianBlurNEON.cpp',
'platform/graphics/filters/arm/FEGaussianBlurNEON.h',
>
</File>
<File
- RelativePath="..\platform\graphics\filters\arm\FECompositeArithmeticNEON.cpp"
- >
- </File>
- <File
RelativePath="..\platform\graphics\filters\arm\FECompositeArithmeticNEON.h"
>
</File>
49E912AC0EFAC906009D0CAF /* AnimationList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49E912A70EFAC906009D0CAF /* AnimationList.cpp */; };
49E912AD0EFAC906009D0CAF /* AnimationList.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A80EFAC906009D0CAF /* AnimationList.h */; settings = {ATTRIBUTES = (Private, ); }; };
49E912AE0EFAC906009D0CAF /* TimingFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A90EFAC906009D0CAF /* TimingFunction.h */; settings = {ATTRIBUTES = (Private, ); }; };
- 49ECEB671499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */; };
49ECEB681499790D00CDD3A4 /* FECompositeArithmeticNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */; };
49ECEB691499790D00CDD3A4 /* FEGaussianBlurNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */; };
49ECEB6A1499790D00CDD3A4 /* FEGaussianBlurNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */; };
49E912A70EFAC906009D0CAF /* AnimationList.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AnimationList.cpp; path = animation/AnimationList.cpp; sourceTree = "<group>"; };
49E912A80EFAC906009D0CAF /* AnimationList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AnimationList.h; path = animation/AnimationList.h; sourceTree = "<group>"; };
49E912A90EFAC906009D0CAF /* TimingFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TimingFunction.h; path = animation/TimingFunction.h; sourceTree = "<group>"; };
- 49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FECompositeArithmeticNEON.cpp; sourceTree = "<group>"; };
49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FECompositeArithmeticNEON.h; sourceTree = "<group>"; };
49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FEGaussianBlurNEON.cpp; sourceTree = "<group>"; };
49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FEGaussianBlurNEON.h; sourceTree = "<group>"; };
49ECEB5C1499790D00CDD3A4 /* arm */ = {
isa = PBXGroup;
children = (
- 49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */,
49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */,
49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */,
49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */,
A75E8B8A0E1DE2D6007F2481 /* FEColorMatrix.cpp in Sources */,
A75E8B8C0E1DE2D6007F2481 /* FEComponentTransfer.cpp in Sources */,
A75E8B8E0E1DE2D6007F2481 /* FEComposite.cpp in Sources */,
- 49ECEB671499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp in Sources */,
84730D781248F0B300D3A9C9 /* FEConvolveMatrix.cpp in Sources */,
50D403C714768C9400D30BB5 /* FECustomFilter.cpp in Sources */,
84730D7A1248F0B300D3A9C9 /* FEDiffuseLighting.cpp in Sources */,
forceValidPreMultipliedPixels();
}
-template <int b1, int b2, int b3, int b4>
+template <int b1, int b4>
static inline void computeArithmeticPixels(unsigned char* source, unsigned char* destination, int pixelArrayLength,
float k1, float k2, float k3, float k4)
{
while (--pixelArrayLength >= 0) {
unsigned char i1 = *source;
unsigned char i2 = *destination;
- float result = 0;
+ float result = k2 * i1 + k3 * i2;
if (b1)
result += scaledK1 * i1 * i2;
- if (b2)
- result += k2 * i1;
- if (b3)
- result += k3 * i2;
if (b4)
result += scaledK4;
{
if (!k4) {
if (!k1) {
- computeArithmeticPixels<0, 1, 1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ computeArithmeticPixels<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
return;
}
- computeArithmeticPixels<1, 1, 1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ computeArithmeticPixels<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
return;
}
if (!k1) {
- computeArithmeticPixels<0, 1, 1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ computeArithmeticPixels<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
return;
}
- computeArithmeticPixels<1, 1, 1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ computeArithmeticPixels<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
}
inline void FEComposite::platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination,
int length = source->length();
ASSERT(length == static_cast<int>(destination->length()));
// The selection here eventually should happen dynamically.
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
+#if HAVE(ARM_NEON_INTRINSICS)
ASSERT(!(length & 0x3));
- float coefficients[4] = { k1, k2, k3, k4 };
- platformArithmeticNeon(source->data(), destination->data(), length, coefficients);
+ platformArithmeticNeon(source->data(), destination->data(), length, k1, k2, k3, k4);
#else
arithmeticSoftware(source->data(), destination->data(), length, k1, k2, k3, k4);
#endif
private:
FEComposite(Filter*, const CompositeOperationType&, float, float, float, float);
- inline void platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination, float k1, float k2, float k3, float k4);
- inline void platformArithmeticNeon(unsigned char* source, unsigned char* destination, unsigned pixelArrayLength, float* kArray);
+ inline void platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination,
+ float k1, float k2, float k3, float k4);
+ template <int b1, int b4>
+ static inline void computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination,
+ unsigned pixelArrayLength, float k1, float k2, float k3, float k4);
+ static inline void platformArithmeticNeon(unsigned char* source, unsigned char* destination,
+ unsigned pixelArrayLength, float k1, float k2, float k3, float k4);
CompositeOperationType m_type;
float m_k1;
+++ /dev/null
-/*
- * Copyright (C) 2011 University of Szeged
- * Copyright (C) 2011 Felician Marton
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#if ENABLE(FILTERS)
-#include "FECompositeArithmeticNEON.h"
-
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
-
-namespace WebCore {
-
-#define ASSTRING(str) #str
-#define TOSTRING(value) ASSTRING(value)
-
-#define NL "\n"
-
-#define SOURCE_R "r0"
-#define DEST_R "r1"
-#define END_R "r2"
-#define K_R "r3"
-#define NEXTPIXEL_R "r12"
-
-#define TEMP_Q "q0"
-#define TEMP_D0 "d0"
-#define TEMP_D00 "d0[0]"
-#define TEMP_D01 "d0[1]"
-#define TEMP_D10 "d1[0]"
-#define TEMP_D11 "d1[1]"
-#define PIXEL1_Q "q1"
-#define PIXEL1_D0 "d2"
-#define PIXEL1_D00 "d2[0]"
-#define PIXEL2_Q "q2"
-#define PIXEL2_D0 "d4"
-#define PIXEL2_D00 "d4[0]"
-#define BYTEMAX_Q "q3"
-#define K1_Q "q8"
-#define K2_Q "q9"
-#define K3_Q "q10"
-#define K4_Q "q11"
-
-asm ( // NOLINT
-".globl " TOSTRING(neonDrawCompositeArithmetic) NL
-TOSTRING(neonDrawCompositeArithmetic) ":" NL
- "cmp " END_R ", #0" NL
- "bxeq lr" NL
- // Set the end of the source register.
- "add " END_R ", " SOURCE_R ", " END_R NL
-
- "vld1.f32 {" TEMP_Q "}, [" K_R "]" NL
- "ldr " K_R ", [" K_R "]" NL
- "vdup.f32 " K1_Q ", " TEMP_D00 NL
- "vdup.f32 " K2_Q ", " TEMP_D01 NL
- "vdup.f32 " K3_Q ", " TEMP_D10 NL
- "vdup.f32 " K4_Q ", " TEMP_D11 NL
-
- "vmov.i32 " BYTEMAX_Q ", #0xFF" NL
- "vcvt.f32.u32 " TEMP_Q ", " BYTEMAX_Q NL
- "vmul.f32 " K4_Q ", " K4_Q ", " TEMP_Q NL
-
- "mov " NEXTPIXEL_R ", #4" NL
- "cmp " K_R ", #0" NL
- "beq .arithmeticK1IsZero" NL
-
- "vrecpe.f32 " TEMP_Q ", " TEMP_Q NL
- "vmul.f32 " K1_Q ", " K1_Q ", " TEMP_Q NL
-
-".arithmeticK1IsNonZero:" NL
-
- "vld1.u32 " PIXEL1_D00 ", [ " SOURCE_R "], " NEXTPIXEL_R NL
- "vld1.u32 " PIXEL2_D00 ", [" DEST_R "]" NL
-
- "vmovl.u8 " PIXEL1_Q ", " PIXEL1_D0 NL
- "vmovl.u16 " PIXEL1_Q ", " PIXEL1_D0 NL
- "vcvt.f32.u32 " PIXEL1_Q ", " PIXEL1_Q NL
- "vmovl.u8 " PIXEL2_Q ", " PIXEL2_D0 NL
- "vmovl.u16 " PIXEL2_Q ", " PIXEL2_D0 NL
- "vcvt.f32.u32 " PIXEL2_Q ", " PIXEL2_Q NL
-
- "vmul.f32 " TEMP_Q ", " PIXEL1_Q ", " PIXEL2_Q NL
- "vmul.f32 " TEMP_Q ", " TEMP_Q ", " K1_Q NL
- "vmla.f32 " TEMP_Q ", " PIXEL1_Q ", " K2_Q NL
- "vmla.f32 " TEMP_Q ", " PIXEL2_Q ", " K3_Q NL
- "vadd.f32 " TEMP_Q ", " K4_Q NL
-
- // Convert result to uint so negative values are converted to zero.
- "vcvt.u32.f32 " TEMP_Q ", " TEMP_Q NL
- "vmin.u32 " TEMP_Q ", " TEMP_Q ", " BYTEMAX_Q NL
- "vmovn.u32 " TEMP_D0 ", " TEMP_Q NL
- "vmovn.u16 " TEMP_D0 ", " TEMP_Q NL
-
- "vst1.u32 " TEMP_D00 ", [" DEST_R "], " NEXTPIXEL_R NL
-
- "cmp " SOURCE_R ", " END_R NL
- "bcc .arithmeticK1IsNonZero" NL
- "bx lr" NL
-
-".arithmeticK1IsZero:" NL
-
- "vld1.u32 " PIXEL1_D00 ", [ " SOURCE_R "], " NEXTPIXEL_R NL
- "vld1.u32 " PIXEL2_D00 ", [" DEST_R "]" NL
-
- "vmovl.u8 " PIXEL1_Q ", " PIXEL1_D0 NL
- "vmovl.u16 " PIXEL1_Q ", " PIXEL1_D0 NL
- "vcvt.f32.u32 " PIXEL1_Q ", " PIXEL1_Q NL
- "vmovl.u8 " PIXEL2_Q ", " PIXEL2_D0 NL
- "vmovl.u16 " PIXEL2_Q ", " PIXEL2_D0 NL
- "vcvt.f32.u32 " PIXEL2_Q ", " PIXEL2_Q NL
-
- "vmul.f32 " TEMP_Q ", " PIXEL1_Q ", " K2_Q NL
- "vmla.f32 " TEMP_Q ", " PIXEL2_Q ", " K3_Q NL
- "vadd.f32 " TEMP_Q ", " K4_Q NL
-
- // Convert result to uint so negative values are converted to zero.
- "vcvt.u32.f32 " TEMP_Q ", " TEMP_Q NL
- "vmin.u32 " TEMP_Q ", " TEMP_Q ", " BYTEMAX_Q NL
- "vmovn.u32 " TEMP_D0 ", " TEMP_Q NL
- "vmovn.u16 " TEMP_D0 ", " TEMP_Q NL
-
- "vst1.u32 " TEMP_D00 ", [" DEST_R "], " NEXTPIXEL_R NL
-
- "cmp " SOURCE_R ", " END_R NL
- "bcc .arithmeticK1IsZero" NL
- "bx lr" NL
-); // NOLINT
-
-} // namespace WebCore
-
-#endif // CPU(ARM_NEON) && COMPILER(GCC)
-
-#endif // ENABLE(FILTERS)
-
#include <wtf/Platform.h>
-#if ENABLE(FILTERS)
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
+#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
#include "FEComposite.h"
+#include <arm_neon.h>
namespace WebCore {
-extern "C" {
-void neonDrawCompositeArithmetic(unsigned char* source, unsigned char* destination, unsigned pixelArrayLength, float* coefficients);
+template <int b1, int b4>
+inline void FEComposite::computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination,
+ unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
+{
+ float32x4_t k1x4 = vdupq_n_f32(k1 / 255);
+ float32x4_t k2x4 = vdupq_n_f32(k2);
+ float32x4_t k3x4 = vdupq_n_f32(k3);
+ float32x4_t k4x4 = vdupq_n_f32(k4 * 255);
+ uint32x4_t max255 = vdupq_n_u32(255);
+
+ uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(source);
+ uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(destination);
+ uint32_t* destinationEndPixel = destinationPixel + (pixelArrayLength >> 2);
+
+ while (destinationPixel < destinationEndPixel) {
+ uint32x2_t temporary1 = vset_lane_u32(*sourcePixel, temporary1, 0);
+ uint16x4_t temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
+ float32x4_t sourcePixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
+
+ temporary1 = vset_lane_u32(*destinationPixel, temporary1, 0);
+ temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
+ float32x4_t destinationPixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
+
+ float32x4_t result = vmulq_f32(sourcePixelAsFloat, k2x4);
+ result = vmlaq_f32(result, destinationPixelAsFloat, k3x4);
+ if (b1)
+ result = vmlaq_f32(result, vmulq_f32(sourcePixelAsFloat, destinationPixelAsFloat), k1x4);
+ if (b4)
+ result = vaddq_f32(result, k4x4);
+
+ // Convert result to uint so negative values are converted to zero.
+ uint16x4_t temporary3 = vmovn_u32(vminq_u32(vcvtq_u32_f32(result), max255));
+ uint8x8_t temporary4 = vmovn_u16(vcombine_u16(temporary3, temporary3));
+ *destinationPixel++ = vget_lane_u32(vreinterpret_u32_u8(temporary4), 0);
+ ++sourcePixel;
+ }
}
-inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination, unsigned pixelArrayLength, float* coefficients)
+inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination,
+ unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
{
- neonDrawCompositeArithmetic(source, destination, pixelArrayLength, coefficients);
+ if (!k4) {
+ if (!k1) {
+ computeArithmeticPixelsNeon<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ return;
+ }
+
+ computeArithmeticPixelsNeon<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ return;
+ }
+
+ if (!k1) {
+ computeArithmeticPixelsNeon<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+ return;
+ }
+ computeArithmeticPixelsNeon<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
}
} // namespace WebCore
-#endif // CPU(ARM_NEON) && COMPILER(GCC)
-#endif // ENABLE(FILTERS)
+#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
#endif // FECompositeArithmeticNEON_h