dali/internal/common/math.cpp

   1 /*
   2  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // CLASS HEADER
  19 #include <dali/internal/common/math.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <cmath>
  23
  24 // INTERNAL INCLUDES
  25 #include <dali/internal/render/common/performance-monitor.h>
  26 #include <dali/public-api/common/constants.h>
  27 #include <dali/public-api/math/matrix.h>
  28 #include <dali/public-api/math/vector2.h>
  29
  30 void Dali::Internal::TransformVector3(Vec3 result, const Mat4 m, const Vec3 v)
  31 {
  32 #ifndef __ARM_NEON__
  33
  34   result[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8];
  35   result[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9];
  36   result[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10];
  37
  38 #else
  39
  40   Vec4 temp = {v[0], v[1], v[2], 0.0f};
  41   Vec4 tempResult;
  42
  43   asm volatile(
  44     "VLD1.F32   {q0}, [%1]     \n\t" //Load "temp" from memory to register q0
  45     "VLD1.F32   {q1}, [%0]!    \n\t" //Load first row of the matrix from memory to register q1
  46     "VMUL.F32   q2, q1, d0[0]  \n\t" //q2 = (m[0..3] * v.x)
  47     "VLD1.F32   {q1}, [%0]!    \n\t" //Load second row of the matrix from memory
  48     "VMLA.F32   q2, q1, d0[1]  \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y)
  49     "VLD1.F32   {q1}, [%0]!    \n\t" //Load third row of the matrix from memory
  50     "VMLA.F32   q2, q1, d1[0]  \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y) + (m[8...11] * v.z)
  51     "VST1.F32   {q2}, [%2]     \n\t" //Write the result back to memory
  52     :
  53     : "r"(m), "r"(temp), "r"(tempResult)
  54     : "q0", "q1", "q2", "memory");
  55
  56   result[0] = tempResult[0];
  57   result[1] = tempResult[1];
  58   result[2] = tempResult[2];
  59
  60 #endif
  61 }
  62
  63 Dali::Vector2 Dali::Internal::Transform2D(const Dali::Matrix& matrix, const float x, const float y)
  64 {
  65   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 4);
  66
  67   const float* matrixArray(matrix.AsFloat());
  68
  69   // The following optimizations are applied:
  70   // Matrix[8 -> 11] are optimized out.
  71   // Matrix[12 -> 15] are always multiplied by 1.
  72   // z & w results (if we were doing a transformation to a Vector4) are unneeded and so not calculated.
  73   // As we always multiply by component, we do not store the coordinates in a Vector2 to avoid creation.
  74   // Note: For this reason the NEON SIMD version is no faster than the Dali::Matrix '*' Vector4 operator, and therefore not used.
  75   return Dali::Vector2(x * matrixArray[0] + y * matrixArray[4] + matrixArray[12], x * matrixArray[1] + y * matrixArray[5] + matrixArray[13]);
  76 }
  77
  78 float Dali::Internal::Length(const Vec3 v)
  79 {
  80   return sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
  81 }