2 * Copyright (c) 2017 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <dali/internal/common/math.h>
25 #include <dali/internal/render/common/performance-monitor.h>
26 #include <dali/public-api/common/constants.h>
27 #include <dali/public-api/math/vector2.h>
28 #include <dali/public-api/math/matrix.h>
30 void Dali::Internal::TransformVector3( Vec3 result, const Mat4 m, const Vec3 v )
34 result[0] = v[0] * m[0] + v[1] * m[4] + v[2] * m[8];
35 result[1] = v[0] * m[1] + v[1] * m[5] + v[2] * m[9];
36 result[2] = v[0] * m[2] + v[1] * m[6] + v[2] * m[10];
40 Vec4 temp = { v[0], v[1], v[2], 0.0f };
43 asm volatile ( "VLD1.F32 {q0}, [%1] \n\t" //Load "temp" from memory to register q0
44 "VLD1.F32 {q1}, [%0]! \n\t" //Load first row of the matrix from memory to register q1
45 "VMUL.F32 q2, q1, d0[0] \n\t" //q2 = (m[0..3] * v.x)
46 "VLD1.F32 {q1}, [%0]! \n\t" //Load second row of the matrix from memory
47 "VMLA.F32 q2, q1, d0[1] \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y)
48 "VLD1.F32 {q1}, [%0]! \n\t" //Load third row of the matrix from memory
49 "VMLA.F32 q2, q1, d1[0] \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y) + (m[8...11] * v.z)
50 "VST1.F32 {q2}, [%2] \n\t" //Write the result back to memory
52 : "r"(m), "r"(temp), "r"(tempResult)
53 : "q0", "q1", "q2", "memory" );
55 result[0] = tempResult[0];
56 result[1] = tempResult[1];
57 result[2] = tempResult[2];
62 Dali::Vector2 Dali::Internal::Transform2D( const Dali::Matrix& matrix, const float x, const float y )
64 MATH_INCREASE_BY( PerformanceMonitor::FLOAT_POINT_MULTIPLY, 4 );
66 const float* matrixArray( matrix.AsFloat() );
68 // The following optimizations are applied:
69 // Matrix[8 -> 11] are optimized out.
70 // Matrix[12 -> 15] are always multiplied by 1.
71 // z & w results (if we were doing a transformation to a Vector4) are unneeded and so not calculated.
72 // As we always multiply by component, we do not store the coordinates in a Vector2 to avoid creation.
73 // Note: For this reason the NEON SIMD version is no faster than the Dali::Matrix '*' Vector4 operator, and therefore not used.
74 return Dali::Vector2( x * matrixArray[0] + y * matrixArray[4] + matrixArray[12], x * matrixArray[1] + y * matrixArray[5] + matrixArray[13] );
77 float Dali::Internal::Length( const Vec3 v )
79 return sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);