NEON intrinsic should be used with arithmetic mode of composite filter

author zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>

Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)

committer zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>

Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)
author zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)
committer zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)
diff --git a/Source/WebCore/CMakeLists.txt b/Source/WebCore/CMakeLists.txt

index 9de4866..b200757 100644 (file)
--- a/Source/WebCore/CMakeLists.txt
+++ b/Source/WebCore/CMakeLists.txt
@@ -1213,7 +1213,6 @@ SET(WebCore_SOURCES
      platform/graphics/filters/SourceAlpha.cpp
      platform/graphics/filters/SourceGraphic.cpp
  
-    platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp
      platform/graphics/filters/arm/FECompositeArithmeticNEON.h
      platform/graphics/filters/arm/FEGaussianBlurNEON.cpp
      platform/graphics/filters/arm/FEGaussianBlurNEON.h
diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog

index 6da4b40..def731f 100644 (file)
--- a/Source/WebCore/ChangeLog
+++ b/Source/WebCore/ChangeLog
@@ -1,3 +1,34 @@
+2012-05-17  Zoltan Herczeg  <zherczeg@webkit.org>
+
+        NEON intrinsic should be used with arithmetic mode of composite filter
+        https://bugs.webkit.org/show_bug.cgi?id=86622
+
+        Reviewed by Nikolas Zimmermann.
+
+        Rewrite hand written assembly code to increase portability and readibility
+        of the code. Remove the unnecessary FECompositeArithmeticNEON.cpp from the
+        project
+
+        Existing tests cover this issue.
+
+        * CMakeLists.txt:
+        * GNUmakefile.list.am:
+        * Target.pri:
+        * WebCore.gypi:
+        * WebCore.vcproj/WebCore.vcproj:
+        * WebCore.xcodeproj/project.pbxproj:
+        * platform/graphics/filters/FEComposite.cpp:
+        (WebCore):
+        (WebCore::computeArithmeticPixels):
+        (WebCore::arithmeticSoftware):
+        (WebCore::FEComposite::platformArithmeticSoftware):
+        * platform/graphics/filters/FEComposite.h:
+        * platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp: Removed.
+        * platform/graphics/filters/arm/FECompositeArithmeticNEON.h:
+        (WebCore):
+        (WebCore::FEComposite::computeArithmeticPixelsNeon):
+        (WebCore::FEComposite::platformArithmeticNeon):
+
  2012-05-17  Takashi Sakamoto  <tasak@google.com>
  
          showNodePath will be useful for debugging purpose.
diff --git a/Source/WebCore/GNUmakefile.list.am b/Source/WebCore/GNUmakefile.list.am

index 9544867..60b4c8e 100644 (file)
--- a/Source/WebCore/GNUmakefile.list.am
+++ b/Source/WebCore/GNUmakefile.list.am
@@ -3214,7 +3214,6 @@ webcore_sources += \
         Source/WebCore/platform/graphics/filters/SourceGraphic.h \
         Source/WebCore/platform/graphics/filters/SpotLightSource.cpp \
         Source/WebCore/platform/graphics/filters/SpotLightSource.h \
-       Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp \
         Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
         Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
         Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h \
diff --git a/Source/WebCore/Target.pri b/Source/WebCore/Target.pri

index 7bd6022..cf32210 100644 (file)
--- a/Source/WebCore/Target.pri
+++ b/Source/WebCore/Target.pri
@@ -3405,7 +3405,6 @@ contains(DEFINES, ENABLE_FILTERS=1) {
          platform/graphics/filters/SpotLightSource.cpp \
          platform/graphics/filters/SourceAlpha.cpp \
          platform/graphics/filters/SourceGraphic.cpp \
-        platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp \
          platform/graphics/filters/arm/FELightingNEON.cpp \
          platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
  }
diff --git a/Source/WebCore/WebCore.gypi b/Source/WebCore/WebCore.gypi

index e0ee239..9974efc 100644 (file)
--- a/Source/WebCore/WebCore.gypi
+++ b/Source/WebCore/WebCore.gypi
@@ -3832,7 +3832,6 @@
              'platform/graphics/filters/SourceGraphic.h',
              'platform/graphics/filters/SpotLightSource.cpp',
              'platform/graphics/filters/SpotLightSource.h',
-            'platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp',
              'platform/graphics/filters/arm/FECompositeArithmeticNEON.h',
              'platform/graphics/filters/arm/FEGaussianBlurNEON.cpp',
              'platform/graphics/filters/arm/FEGaussianBlurNEON.h',
diff --git a/Source/WebCore/WebCore.vcproj/WebCore.vcproj b/Source/WebCore/WebCore.vcproj/WebCore.vcproj

index b92068b..0fe7b98 100755 (executable)
--- a/Source/WebCore/WebCore.vcproj/WebCore.vcproj
+++ b/Source/WebCore/WebCore.vcproj/WebCore.vcproj
@@ -31228,10 +31228,6 @@
                                                 >
                                         </File>
                                         <File
-                                               RelativePath="..\platform\graphics\filters\arm\FECompositeArithmeticNEON.cpp"
-                                               >
-                                       </File>
-                                       <File
                                                 RelativePath="..\platform\graphics\filters\arm\FECompositeArithmeticNEON.h"
                                                 >
                                         </File>
diff --git a/Source/WebCore/WebCore.xcodeproj/project.pbxproj b/Source/WebCore/WebCore.xcodeproj/project.pbxproj

index a930b03..902ed7c 100644 (file)
--- a/Source/WebCore/WebCore.xcodeproj/project.pbxproj
+++ b/Source/WebCore/WebCore.xcodeproj/project.pbxproj
@@ -1320,7 +1320,6 @@
                 49E912AC0EFAC906009D0CAF /* AnimationList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49E912A70EFAC906009D0CAF /* AnimationList.cpp */; };
                 49E912AD0EFAC906009D0CAF /* AnimationList.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A80EFAC906009D0CAF /* AnimationList.h */; settings = {ATTRIBUTES = (Private, ); }; };
                 49E912AE0EFAC906009D0CAF /* TimingFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A90EFAC906009D0CAF /* TimingFunction.h */; settings = {ATTRIBUTES = (Private, ); }; };
-               49ECEB671499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */; };
                 49ECEB681499790D00CDD3A4 /* FECompositeArithmeticNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */; };
                 49ECEB691499790D00CDD3A4 /* FEGaussianBlurNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */; };
                 49ECEB6A1499790D00CDD3A4 /* FEGaussianBlurNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */; };
@@ -8325,7 +8324,6 @@
                 49E912A70EFAC906009D0CAF /* AnimationList.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AnimationList.cpp; path = animation/AnimationList.cpp; sourceTree = "<group>"; };
                 49E912A80EFAC906009D0CAF /* AnimationList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AnimationList.h; path = animation/AnimationList.h; sourceTree = "<group>"; };
                 49E912A90EFAC906009D0CAF /* TimingFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TimingFunction.h; path = animation/TimingFunction.h; sourceTree = "<group>"; };
-               49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FECompositeArithmeticNEON.cpp; sourceTree = "<group>"; };
                 49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FECompositeArithmeticNEON.h; sourceTree = "<group>"; };
                 49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FEGaussianBlurNEON.cpp; sourceTree = "<group>"; };
                 49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FEGaussianBlurNEON.h; sourceTree = "<group>"; };
@@ -14960,7 +14958,6 @@
                 49ECEB5C1499790D00CDD3A4 /* arm */ = {
                         isa = PBXGroup;
                         children = (
-                               49ECEB5D1499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp */,
                                 49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */,
                                 49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */,
                                 49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */,
@@ -25958,7 +25955,6 @@
                                 A75E8B8A0E1DE2D6007F2481 /* FEColorMatrix.cpp in Sources */,
                                 A75E8B8C0E1DE2D6007F2481 /* FEComponentTransfer.cpp in Sources */,
                                 A75E8B8E0E1DE2D6007F2481 /* FEComposite.cpp in Sources */,
-                               49ECEB671499790D00CDD3A4 /* FECompositeArithmeticNEON.cpp in Sources */,
                                 84730D781248F0B300D3A9C9 /* FEConvolveMatrix.cpp in Sources */,
                                 50D403C714768C9400D30BB5 /* FECustomFilter.cpp in Sources */,
                                 84730D7A1248F0B300D3A9C9 /* FEDiffuseLighting.cpp in Sources */,
diff --git a/Source/WebCore/platform/graphics/filters/FEComposite.cpp b/Source/WebCore/platform/graphics/filters/FEComposite.cpp

index 970b0d2..ddfa1e4 100644 (file)
--- a/Source/WebCore/platform/graphics/filters/FEComposite.cpp
+++ b/Source/WebCore/platform/graphics/filters/FEComposite.cpp
@@ -124,7 +124,7 @@ void FEComposite::correctFilterResultIfNeeded()
      forceValidPreMultipliedPixels();
  }
  
-template <int b1, int b2, int b3, int b4>
+template <int b1, int b4>
  static inline void computeArithmeticPixels(unsigned char* source, unsigned char* destination, int pixelArrayLength,
                                      float k1, float k2, float k3, float k4)
  {
@@ -138,13 +138,9 @@ static inline void computeArithmeticPixels(unsigned char* source, unsigned char*
      while (--pixelArrayLength >= 0) {
          unsigned char i1 = *source;
          unsigned char i2 = *destination;
-        float result = 0;
+        float result = k2 * i1 + k3 * i2;
          if (b1)
              result += scaledK1 * i1 * i2;
-        if (b2)
-            result += k2 * i1;
-        if (b3)
-            result += k3 * i2;
          if (b4)
              result += scaledK4;
  
@@ -164,19 +160,19 @@ static inline void arithmeticSoftware(unsigned char* source, unsigned char* dest
  {
      if (!k4) {
          if (!k1) {
-            computeArithmeticPixels<0, 1, 1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+            computeArithmeticPixels<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
              return;
          }
  
-        computeArithmeticPixels<1, 1, 1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+        computeArithmeticPixels<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
          return;
      }
  
      if (!k1) {
-        computeArithmeticPixels<0, 1, 1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+        computeArithmeticPixels<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
          return;
      }
-    computeArithmeticPixels<1, 1, 1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+    computeArithmeticPixels<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  }
  
  inline void FEComposite::platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination,
@@ -185,10 +181,9 @@ inline void FEComposite::platformArithmeticSoftware(Uint8ClampedArray* source, U
      int length = source->length();
      ASSERT(length == static_cast<int>(destination->length()));
      // The selection here eventually should happen dynamically.
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
+#if HAVE(ARM_NEON_INTRINSICS)
      ASSERT(!(length & 0x3));
-    float coefficients[4]  = { k1, k2, k3, k4 };
-    platformArithmeticNeon(source->data(), destination->data(), length, coefficients);
+    platformArithmeticNeon(source->data(), destination->data(), length, k1, k2, k3, k4);
  #else
      arithmeticSoftware(source->data(), destination->data(), length, k1, k2, k3, k4);
  #endif
diff --git a/Source/WebCore/platform/graphics/filters/FEComposite.h b/Source/WebCore/platform/graphics/filters/FEComposite.h

index 42695f6..60fded1 100644 (file)
--- a/Source/WebCore/platform/graphics/filters/FEComposite.h
+++ b/Source/WebCore/platform/graphics/filters/FEComposite.h
@@ -74,8 +74,13 @@ protected:
  private:
      FEComposite(Filter*, const CompositeOperationType&, float, float, float, float);
  
-    inline void platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination, float k1, float k2, float k3, float k4);
-    inline void platformArithmeticNeon(unsigned char* source, unsigned  char* destination, unsigned pixelArrayLength, float* kArray);
+    inline void platformArithmeticSoftware(Uint8ClampedArray* source, Uint8ClampedArray* destination,
+        float k1, float k2, float k3, float k4);
+    template <int b1, int b4>
+    static inline void computeArithmeticPixelsNeon(unsigned char* source, unsigned  char* destination,
+        unsigned pixelArrayLength, float k1, float k2, float k3, float k4);
+    static inline void platformArithmeticNeon(unsigned char* source, unsigned  char* destination,
+        unsigned pixelArrayLength, float k1, float k2, float k3, float k4);
  
      CompositeOperationType m_type;
      float m_k1;
diff --git a/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp b/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp

deleted file mode 100644 (file)

index 57e0cf0..0000000
--- a/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2011 University of Szeged
- * Copyright (C) 2011 Felician Marton
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#if ENABLE(FILTERS)
-#include "FECompositeArithmeticNEON.h"
-
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
-
-namespace WebCore {
-
-#define ASSTRING(str) #str
-#define TOSTRING(value) ASSTRING(value)
-
-#define NL "\n"
-
-#define SOURCE_R     "r0"
-#define DEST_R       "r1"
-#define END_R        "r2"
-#define K_R          "r3"
-#define NEXTPIXEL_R  "r12"
-
-#define TEMP_Q       "q0"
-#define TEMP_D0      "d0"
-#define TEMP_D00     "d0[0]"
-#define TEMP_D01     "d0[1]"
-#define TEMP_D10     "d1[0]"
-#define TEMP_D11     "d1[1]"
-#define PIXEL1_Q     "q1"
-#define PIXEL1_D0    "d2"
-#define PIXEL1_D00   "d2[0]"
-#define PIXEL2_Q     "q2"
-#define PIXEL2_D0    "d4"
-#define PIXEL2_D00   "d4[0]"
-#define BYTEMAX_Q    "q3"
-#define K1_Q         "q8"
-#define K2_Q         "q9"
-#define K3_Q         "q10"
-#define K4_Q         "q11"
-
-asm ( // NOLINT
-".globl " TOSTRING(neonDrawCompositeArithmetic) NL
-TOSTRING(neonDrawCompositeArithmetic) ":" NL
-    "cmp " END_R ", #0" NL
-    "bxeq lr"  NL
-    // Set the end of the source register.
-    "add " END_R ", " SOURCE_R ", " END_R NL
-
-    "vld1.f32 {" TEMP_Q "}, [" K_R "]" NL
-    "ldr " K_R ", [" K_R "]" NL
-    "vdup.f32 " K1_Q ", " TEMP_D00 NL
-    "vdup.f32 " K2_Q ", " TEMP_D01 NL
-    "vdup.f32 " K3_Q ", " TEMP_D10 NL
-    "vdup.f32 " K4_Q ", " TEMP_D11 NL
-
-    "vmov.i32 " BYTEMAX_Q ", #0xFF" NL
-    "vcvt.f32.u32 " TEMP_Q ", " BYTEMAX_Q NL
-    "vmul.f32 " K4_Q ", " K4_Q ", " TEMP_Q NL
-
-    "mov " NEXTPIXEL_R ", #4" NL
-    "cmp " K_R ", #0" NL
-    "beq .arithmeticK1IsZero" NL
-
-    "vrecpe.f32 " TEMP_Q ", " TEMP_Q NL
-    "vmul.f32 " K1_Q ", " K1_Q ", " TEMP_Q NL
-
-".arithmeticK1IsNonZero:" NL
-
-    "vld1.u32 " PIXEL1_D00 ", [ " SOURCE_R "], " NEXTPIXEL_R NL
-    "vld1.u32 " PIXEL2_D00 ", [" DEST_R "]" NL
-
-    "vmovl.u8 " PIXEL1_Q ", " PIXEL1_D0 NL
-    "vmovl.u16 " PIXEL1_Q ", " PIXEL1_D0 NL
-    "vcvt.f32.u32 " PIXEL1_Q ", " PIXEL1_Q NL
-    "vmovl.u8 " PIXEL2_Q ", " PIXEL2_D0 NL
-    "vmovl.u16 " PIXEL2_Q ", " PIXEL2_D0 NL
-    "vcvt.f32.u32 " PIXEL2_Q ", " PIXEL2_Q NL
-
-    "vmul.f32 " TEMP_Q ", " PIXEL1_Q ", " PIXEL2_Q NL
-    "vmul.f32 " TEMP_Q ", " TEMP_Q ", " K1_Q NL
-    "vmla.f32 " TEMP_Q ", " PIXEL1_Q ", " K2_Q NL
-    "vmla.f32 " TEMP_Q ", " PIXEL2_Q ", " K3_Q NL
-    "vadd.f32 " TEMP_Q ", " K4_Q NL
-
-    // Convert result to uint so negative values are converted to zero.
-    "vcvt.u32.f32 " TEMP_Q ", " TEMP_Q NL
-    "vmin.u32 " TEMP_Q ", " TEMP_Q ", " BYTEMAX_Q NL
-    "vmovn.u32 " TEMP_D0 ", " TEMP_Q NL
-    "vmovn.u16 " TEMP_D0 ", " TEMP_Q NL
-
-    "vst1.u32 " TEMP_D00 ", [" DEST_R "], " NEXTPIXEL_R NL
-
-    "cmp " SOURCE_R ", " END_R NL
-    "bcc .arithmeticK1IsNonZero" NL
-    "bx lr" NL
-
-".arithmeticK1IsZero:" NL
-
-    "vld1.u32 " PIXEL1_D00 ", [ " SOURCE_R "], " NEXTPIXEL_R NL
-    "vld1.u32 " PIXEL2_D00 ", [" DEST_R "]" NL
-
-    "vmovl.u8 " PIXEL1_Q ", " PIXEL1_D0 NL
-    "vmovl.u16 " PIXEL1_Q ", " PIXEL1_D0 NL
-    "vcvt.f32.u32 " PIXEL1_Q ", " PIXEL1_Q NL
-    "vmovl.u8 " PIXEL2_Q ", " PIXEL2_D0 NL
-    "vmovl.u16 " PIXEL2_Q ", " PIXEL2_D0 NL
-    "vcvt.f32.u32 " PIXEL2_Q ", " PIXEL2_Q NL
-
-    "vmul.f32 " TEMP_Q ", " PIXEL1_Q ", " K2_Q NL
-    "vmla.f32 " TEMP_Q ", " PIXEL2_Q ", " K3_Q NL
-    "vadd.f32 " TEMP_Q ", " K4_Q NL
-
-    // Convert result to uint so negative values are converted to zero.
-    "vcvt.u32.f32 " TEMP_Q ", " TEMP_Q NL
-    "vmin.u32 " TEMP_Q ", " TEMP_Q ", " BYTEMAX_Q NL
-    "vmovn.u32 " TEMP_D0 ", " TEMP_Q NL
-    "vmovn.u16 " TEMP_D0 ", " TEMP_Q NL
-
-    "vst1.u32 " TEMP_D00 ", [" DEST_R "], " NEXTPIXEL_R NL
-
-    "cmp " SOURCE_R ", " END_R NL
-    "bcc .arithmeticK1IsZero" NL
-    "bx lr" NL
-); // NOLINT
-
-} // namespace WebCore
-
-#endif // CPU(ARM_NEON) && COMPILER(GCC)
-
-#endif // ENABLE(FILTERS)
-
diff --git a/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h b/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h

index 64cec1d..50f34b7 100644 (file)
--- a/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h
+++ b/Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h
@@ -29,25 +29,73 @@
  
  #include <wtf/Platform.h>
  
-#if ENABLE(FILTERS)
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
+#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
  
  #include "FEComposite.h"
+#include <arm_neon.h>
  
  namespace WebCore {
  
-extern "C" {
-void neonDrawCompositeArithmetic(unsigned char* source, unsigned char* destination, unsigned pixelArrayLength, float* coefficients);
+template <int b1, int b4>
+inline void FEComposite::computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination,
+    unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
+{
+    float32x4_t k1x4 = vdupq_n_f32(k1 / 255);
+    float32x4_t k2x4 = vdupq_n_f32(k2);
+    float32x4_t k3x4 = vdupq_n_f32(k3);
+    float32x4_t k4x4 = vdupq_n_f32(k4 * 255);
+    uint32x4_t max255 = vdupq_n_u32(255);
+
+    uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(source);
+    uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(destination);
+    uint32_t* destinationEndPixel = destinationPixel + (pixelArrayLength >> 2);
+
+    while (destinationPixel < destinationEndPixel) {
+        uint32x2_t temporary1 = vset_lane_u32(*sourcePixel, temporary1, 0);
+        uint16x4_t temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
+        float32x4_t sourcePixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
+
+        temporary1 = vset_lane_u32(*destinationPixel, temporary1, 0);
+        temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
+        float32x4_t destinationPixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
+
+        float32x4_t result = vmulq_f32(sourcePixelAsFloat, k2x4);
+        result = vmlaq_f32(result, destinationPixelAsFloat, k3x4);
+        if (b1)
+            result = vmlaq_f32(result, vmulq_f32(sourcePixelAsFloat, destinationPixelAsFloat), k1x4);
+        if (b4)
+            result = vaddq_f32(result, k4x4);
+
+        // Convert result to uint so negative values are converted to zero.
+        uint16x4_t temporary3 = vmovn_u32(vminq_u32(vcvtq_u32_f32(result), max255));
+        uint8x8_t temporary4 = vmovn_u16(vcombine_u16(temporary3, temporary3));
+        *destinationPixel++ = vget_lane_u32(vreinterpret_u32_u8(temporary4), 0);
+        ++sourcePixel;
+    }
  }
  
-inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination, unsigned pixelArrayLength, float* coefficients)
+inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination,
+    unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
  {
-    neonDrawCompositeArithmetic(source, destination, pixelArrayLength, coefficients);
+    if (!k4) {
+        if (!k1) {
+            computeArithmeticPixelsNeon<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+            return;
+        }
+
+        computeArithmeticPixelsNeon<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+        return;
+    }
+
+    if (!k1) {
+        computeArithmeticPixelsNeon<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
+        return;
+    }
+    computeArithmeticPixelsNeon<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  }
  
  } // namespace WebCore
  
-#endif // CPU(ARM_NEON) && COMPILER(GCC)
-#endif // ENABLE(FILTERS)
+#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
  
  #endif // FECompositeArithmeticNEON_h
author	zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
	Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)
committer	zherczeg@webkit.org <zherczeg@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
	Thu, 17 May 2012 09:12:54 +0000 (09:12 +0000)
Source/WebCore/CMakeLists.txt		patch \| blob \| history
Source/WebCore/ChangeLog		patch \| blob \| history
Source/WebCore/GNUmakefile.list.am		patch \| blob \| history
Source/WebCore/Target.pri		patch \| blob \| history
Source/WebCore/WebCore.gypi		patch \| blob \| history
Source/WebCore/WebCore.vcproj/WebCore.vcproj		patch \| blob \| history
Source/WebCore/WebCore.xcodeproj/project.pbxproj		patch \| blob \| history
Source/WebCore/platform/graphics/filters/FEComposite.cpp		patch \| blob \| history
Source/WebCore/platform/graphics/filters/FEComposite.h		patch \| blob \| history
Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.cpp	[deleted file]	patch \| blob \| history
Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h		patch \| blob \| history