src/third_party/WebKit/Source/platform/graphics/cpu/arm/filters/FECompositeArithmeticNEON.h

   1 /*
   2  * Copyright (C) 2011 University of Szeged
   3  * Copyright (C) 2011 Felician Marton
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
  15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
  18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26
  27 #ifndef FECompositeArithmeticNEON_h
  28 #define FECompositeArithmeticNEON_h
  29
  30 #if HAVE(ARM_NEON_INTRINSICS)
  31
  32 #include "platform/graphics/filters/FEComposite.h"
  33 #include <arm_neon.h>
  34
  35 namespace blink {
  36
  37 template <int b1, int b4>
  38 inline void FEComposite::computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination,
  39     unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
  40 {
  41     float32x4_t k1x4 = vdupq_n_f32(k1 / 255);
  42     float32x4_t k2x4 = vdupq_n_f32(k2);
  43     float32x4_t k3x4 = vdupq_n_f32(k3);
  44     float32x4_t k4x4 = vdupq_n_f32(k4 * 255);
  45     uint32x4_t max255 = vdupq_n_u32(255);
  46
  47     uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(source);
  48     uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(destination);
  49     uint32_t* destinationEndPixel = destinationPixel + (pixelArrayLength >> 2);
  50
  51     while (destinationPixel < destinationEndPixel) {
  52         uint32x2_t temporary1 = vset_lane_u32(*sourcePixel, temporary1, 0);
  53         uint16x4_t temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
  54         float32x4_t sourcePixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
  55
  56         temporary1 = vset_lane_u32(*destinationPixel, temporary1, 0);
  57         temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
  58         float32x4_t destinationPixelAsFloat = vcvtq_f32_u32(vmovl_u16(temporary2));
  59
  60         float32x4_t result = vmulq_f32(sourcePixelAsFloat, k2x4);
  61         result = vmlaq_f32(result, destinationPixelAsFloat, k3x4);
  62         if (b1)
  63             result = vmlaq_f32(result, vmulq_f32(sourcePixelAsFloat, destinationPixelAsFloat), k1x4);
  64         if (b4)
  65             result = vaddq_f32(result, k4x4);
  66
  67         // Convert result to uint so negative values are converted to zero.
  68         uint16x4_t temporary3 = vmovn_u32(vminq_u32(vcvtq_u32_f32(result), max255));
  69         uint8x8_t temporary4 = vmovn_u16(vcombine_u16(temporary3, temporary3));
  70         *destinationPixel++ = vget_lane_u32(vreinterpret_u32_u8(temporary4), 0);
  71         ++sourcePixel;
  72     }
  73 }
  74
  75 inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination,
  76     unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
  77 {
  78     if (!k4) {
  79         if (!k1) {
  80             computeArithmeticPixelsNeon<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  81             return;
  82         }
  83
  84         computeArithmeticPixelsNeon<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  85         return;
  86     }
  87
  88     if (!k1) {
  89         computeArithmeticPixelsNeon<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  90         return;
  91     }
  92     computeArithmeticPixelsNeon<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
  93 }
  94
  95 } // namespace blink
  96
  97 #endif // HAVE(ARM_NEON_INTRINSICS)
  98
  99 #endif // FECompositeArithmeticNEON_h