dali/public-api/math/matrix.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // CLASS HEADERS
  19 #include <dali/public-api/math/matrix.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <cmath>
  23 #include <cstdint> // uint32_t
  24 #include <cstring> // memcpy
  25 #include <ostream>
  26
  27 // INTERNAL INCLUDES
  28 #include <dali/public-api/common/dali-common.h>
  29 #include <dali/public-api/math/vector3.h>
  30 #include <dali/public-api/math/vector4.h>
  31 #include <dali/public-api/math/quaternion.h>
  32 #include <dali/public-api/math/math-utils.h>
  33 #include <dali/internal/render/common/performance-monitor.h>
  34
  35 namespace
  36 {
  37 const float ROTATION_EPSILON = 0.003f; // Deliberately large
  38
  39 const uint32_t NUM_BYTES_IN_ROW_OF_3( 3 * sizeof( float ) );
  40 const uint32_t NUM_BYTES_IN_ROW( 4 * sizeof( float ) );
  41 const uint32_t NUM_BYTES_IN_MATRIX( 16 * sizeof( float ) );
  42 const uint32_t ROW1_OFFSET( 4 );
  43 const uint32_t ROW2_OFFSET( 8 );
  44 const uint32_t ROW3_OFFSET( 12 );
  45
  46 /**
  47  * Helper to convert to Quaternion to float16 array
  48  */
  49 void Convert( float*& m, const Dali::Quaternion& rotation )
  50 {
  51   const float xx = rotation.mVector.x * rotation.mVector.x;
  52   const float yy = rotation.mVector.y * rotation.mVector.y;
  53   const float zz = rotation.mVector.z * rotation.mVector.z;
  54   const float xy = rotation.mVector.x * rotation.mVector.y;
  55   const float xz = rotation.mVector.x * rotation.mVector.z;
  56   const float wx = rotation.mVector.w * rotation.mVector.x;
  57   const float wy = rotation.mVector.w * rotation.mVector.y;
  58   const float wz = rotation.mVector.w * rotation.mVector.z;
  59   const float yz = rotation.mVector.y * rotation.mVector.z;
  60
  61   m[0] = 1.0f - 2.0f * (yy + zz);
  62   m[1] =        2.0f * (xy + wz);
  63   m[2] =        2.0f * (xz - wy);
  64   m[3] = 0.0f;
  65
  66   m[4] =        2.0f * (xy - wz);
  67   m[5] = 1.0f - 2.0f * (xx + zz);
  68   m[6] =        2.0f * (yz + wx);
  69   m[7] = 0.0f;
  70
  71   m[8] =        2.0f * (xz + wy);
  72   m[9] =        2.0f * (yz - wx);
  73   m[10]= 1.0f - 2.0f * (xx + yy);
  74   m[11]= 0.0f;
  75
  76   m[12]= 0.0f;
  77   m[13]= 0.0f;
  78   m[14]= 0.0f;
  79   m[15]= 1.0f;
  80 }
  81 }
  82
  83 namespace Dali
  84 {
  85
  86 using Internal::PerformanceMonitor;
  87
  88 const float identityArray[] = {1.0f, 0.0f, 0.0f, 0.0f,
  89                                0.0f, 1.0f, 0.0f, 0.0f,
  90                                0.0f, 0.0f, 1.0f, 0.0f,
  91                                0.0f, 0.0f, 0.0f, 1.0f};
  92
  93 const Matrix Matrix::IDENTITY(identityArray);
  94
  95 Matrix::Matrix()
  96 {
  97   memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
  98 }
  99
 100 Matrix::Matrix( bool initialize )
 101 {
 102   if( initialize )
 103   {
 104     memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
 105   }
 106 }
 107
 108 Matrix::Matrix(const float* array)
 109 {
 110   memcpy( mMatrix, array, NUM_BYTES_IN_MATRIX );
 111 }
 112
 113 Matrix::Matrix( const Quaternion& rotation )
 114 {
 115   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,18);
 116
 117   float* matrixPtr = &mMatrix[0];
 118   Convert( matrixPtr, rotation );
 119 }
 120
 121 Matrix::Matrix( const Matrix& matrix )
 122 {
 123   memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 124 }
 125
 126 Matrix& Matrix::operator=( const Matrix& matrix )
 127 {
 128   // no point copying if self assigning
 129   if( this != &matrix )
 130   {
 131     memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 132   }
 133   return *this;
 134 }
 135
 136 Matrix::Matrix( Matrix&& matrix )
 137 {
 138   memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 139 }
 140
 141 Matrix& Matrix::operator=( Matrix&& matrix )
 142 {
 143   if( this != &matrix )
 144   {
 145     memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 146   }
 147   return *this;
 148 }
 149
 150 void Matrix::InvertTransform(Matrix& result) const
 151 {
 152   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,12);
 153
 154   float* m1 = result.AsFloat();
 155
 156   DALI_ASSERT_ALWAYS( EqualsZero( mMatrix[3] ) && EqualsZero( mMatrix[7] ) && EqualsZero( mMatrix[11] ) && Equals( mMatrix[15], 1.0f ) && "Must be a transform matrix" );
 157
 158   m1[0] = mMatrix[0];
 159   m1[1] = mMatrix[4];
 160   m1[2] = mMatrix[8];
 161   m1[3] = 0.0f;
 162
 163   m1[4] = mMatrix[1];
 164   m1[5] = mMatrix[5];
 165   m1[6] = mMatrix[9];
 166   m1[7] = 0.0f;
 167
 168   m1[8] = mMatrix[2];
 169   m1[9] = mMatrix[6];
 170   m1[10] = mMatrix[10];
 171   m1[11] = 0.0f;
 172
 173   m1[12] = -( ( mMatrix[0] * mMatrix[12] ) + ( mMatrix[1] * mMatrix[13] ) + ( mMatrix[2] * mMatrix[14] ) + ( mMatrix[3] * mMatrix[15] ) );
 174   m1[13] = -( ( mMatrix[4] * mMatrix[12] ) + ( mMatrix[5] * mMatrix[13] ) + ( mMatrix[6] * mMatrix[14] ) + ( mMatrix[7] * mMatrix[15] ) );
 175   m1[14] = -( ( mMatrix[8] * mMatrix[12] ) + ( mMatrix[9] * mMatrix[13] ) + ( mMatrix[10] * mMatrix[14] ) + ( mMatrix[11] * mMatrix[15] ) );
 176   m1[15] = 1.0f;
 177 }
 178
 179 static bool InvertMatrix(const float* m, float* out)
 180 {
 181   float inv[16];
 182
 183   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,192);  // 12 x 16 multiples
 184
 185   inv[0] =   m[5]*m[10]*m[15] - m[5]*m[11]*m[14] - m[9]*m[6]*m[15] + m[9]*m[7]*m[14] + m[13]*m[6]*m[11] - m[13]*m[7]*m[10];
 186   inv[4] =  -m[4]*m[10]*m[15] + m[4]*m[11]*m[14] + m[8]*m[6]*m[15] - m[8]*m[7]*m[14] - m[12]*m[6]*m[11] + m[12]*m[7]*m[10];
 187   inv[8] =   m[4]*m[9]*m[15] - m[4]*m[11]*m[13] - m[8]*m[5]*m[15] + m[8]*m[7]*m[13] + m[12]*m[5]*m[11] - m[12]*m[7]*m[9];
 188   inv[12] = -m[4]*m[9]*m[14] + m[4]*m[10]*m[13] + m[8]*m[5]*m[14] - m[8]*m[6]*m[13] - m[12]*m[5]*m[10] + m[12]*m[6]*m[9];
 189   inv[1] =  -m[1]*m[10]*m[15] + m[1]*m[11]*m[14] + m[9]*m[2]*m[15] - m[9]*m[3]*m[14] - m[13]*m[2]*m[11] + m[13]*m[3]*m[10];
 190   inv[5] =   m[0]*m[10]*m[15] - m[0]*m[11]*m[14] - m[8]*m[2]*m[15] + m[8]*m[3]*m[14] + m[12]*m[2]*m[11] - m[12]*m[3]*m[10];
 191   inv[9] =  -m[0]*m[9]*m[15] + m[0]*m[11]*m[13] + m[8]*m[1]*m[15] - m[8]*m[3]*m[13] - m[12]*m[1]*m[11] + m[12]*m[3]*m[9];
 192   inv[13] =  m[0]*m[9]*m[14] - m[0]*m[10]*m[13] - m[8]*m[1]*m[14] + m[8]*m[2]*m[13] + m[12]*m[1]*m[10] - m[12]*m[2]*m[9];
 193   inv[2] =   m[1]*m[6]*m[15] - m[1]*m[7]*m[14] - m[5]*m[2]*m[15] + m[5]*m[3]*m[14] + m[13]*m[2]*m[7] - m[13]*m[3]*m[6];
 194   inv[6] =  -m[0]*m[6]*m[15] + m[0]*m[7]*m[14] + m[4]*m[2]*m[15] - m[4]*m[3]*m[14] - m[12]*m[2]*m[7] + m[12]*m[3]*m[6];
 195   inv[10] =  m[0]*m[5]*m[15] - m[0]*m[7]*m[13] - m[4]*m[1]*m[15] + m[4]*m[3]*m[13] + m[12]*m[1]*m[7] - m[12]*m[3]*m[5];
 196   inv[14] = -m[0]*m[5]*m[14] + m[0]*m[6]*m[13] + m[4]*m[1]*m[14] - m[4]*m[2]*m[13] - m[12]*m[1]*m[6] + m[12]*m[2]*m[5];
 197   inv[3] =  -m[1]*m[6]*m[11] + m[1]*m[7]*m[10] + m[5]*m[2]*m[11] - m[5]*m[3]*m[10] - m[9]*m[2]*m[7] + m[9]*m[3]*m[6];
 198   inv[7] =   m[0]*m[6]*m[11] - m[0]*m[7]*m[10] - m[4]*m[2]*m[11] + m[4]*m[3]*m[10] + m[8]*m[2]*m[7] - m[8]*m[3]*m[6];
 199   inv[11] = -m[0]*m[5]*m[11] + m[0]*m[7]*m[9] + m[4]*m[1]*m[11] - m[4]*m[3]*m[9] - m[8]*m[1]*m[7] + m[8]*m[3]*m[5];
 200   inv[15] =  m[0]*m[5]*m[10] - m[0]*m[6]*m[9] - m[4]*m[1]*m[10] + m[4]*m[2]*m[9] + m[8]*m[1]*m[6] - m[8]*m[2]*m[5];
 201
 202   float det = m[0]*inv[0] + m[1]*inv[4] + m[2]*inv[8] + m[3]*inv[12];
 203
 204   // In the case where the determinant is exactly zero, the matrix is non-invertible
 205   if ( EqualsZero( det ) )
 206   {
 207     return false;
 208   }
 209
 210   det = 1.0f / det;
 211
 212   for( int32_t i = 0; i < 16; i++)
 213   {
 214     out[i] = inv[i] * det;
 215   }
 216
 217   return true;
 218 }
 219
 220 bool Matrix::Invert()
 221 {
 222   Matrix temp(*this);
 223
 224   return InvertMatrix(temp.AsFloat(), mMatrix);
 225 }
 226
 227 void Matrix::Transpose()
 228 {
 229   float temp = mMatrix[1];
 230   mMatrix[1] = mMatrix[4];
 231   mMatrix[4] = temp;
 232
 233   temp = mMatrix[2];
 234   mMatrix[2] = mMatrix[8];
 235   mMatrix[8] = temp;
 236
 237   temp = mMatrix[3];
 238   mMatrix[3] = mMatrix[12];
 239   mMatrix[12] = temp;
 240
 241   temp = mMatrix[6];
 242   mMatrix[6] = mMatrix[9];
 243   mMatrix[9] = temp;
 244
 245   temp = mMatrix[7];
 246   mMatrix[7] = mMatrix[13];
 247   mMatrix[13] = temp;
 248
 249   temp = mMatrix[11];
 250   mMatrix[11] = mMatrix[14];
 251   mMatrix[14] = temp;
 252 }
 253
 254 void Matrix::SetIdentity()
 255 {
 256   memcpy( mMatrix, identityArray, NUM_BYTES_IN_MATRIX );
 257 }
 258
 259 void Matrix::SetIdentityAndScale( const Vector3& scale )
 260 {
 261   // initialize to zeros
 262   memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
 263
 264   // just apply scale on the diagonal
 265   mMatrix[0]  = scale.x;
 266   mMatrix[5]  = scale.y;
 267   mMatrix[10] = scale.z;
 268   mMatrix[15] = 1.0f;
 269 }
 270
 271 void Matrix::SetTranslation(const Vector4& translation)
 272 {
 273   memcpy( mMatrix + ROW3_OFFSET, &translation, NUM_BYTES_IN_ROW );
 274 }
 275 void Matrix::SetTranslation(const Vector3& other)
 276 {
 277   memcpy( mMatrix + ROW3_OFFSET, &other, NUM_BYTES_IN_ROW_OF_3 );
 278   mMatrix[15] = 1.0f;
 279 }
 280
 281 void Matrix::Multiply( Matrix& result, const Matrix& lhs, const Matrix& rhs )
 282 {
 283   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 284   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,64); // 64 = 16*4
 285
 286   float* temp = result.AsFloat();
 287   const float* rhsPtr  = rhs.AsFloat();
 288   const float* lhsPtr = lhs.AsFloat();
 289
 290 #ifndef  __ARM_NEON__
 291
 292   for( int32_t i=0; i < 4; i++ )
 293   {
 294     // i<<2 gives the first vector / column
 295     int32_t loc = i<<2;
 296     int32_t loc1 = loc + 1;
 297     int32_t loc2 = loc + 2;
 298     int32_t loc3 = loc + 3;
 299     float value0 = lhsPtr[loc];
 300     float value1 = lhsPtr[loc1];
 301     float value2 = lhsPtr[loc2];
 302     float value3 = lhsPtr[loc3];
 303     temp[loc]  = (value0 * rhsPtr[0]) +
 304                  (value1 * rhsPtr[4]) +
 305                  (value2 * rhsPtr[8]) +
 306                  (value3 * rhsPtr[12]);
 307
 308     temp[loc1] = (value0 * rhsPtr[1]) +
 309                  (value1 * rhsPtr[5]) +
 310                  (value2 * rhsPtr[9]) +
 311                  (value3 * rhsPtr[13]);
 312
 313     temp[loc2] = (value0 * rhsPtr[2]) +
 314                  (value1 * rhsPtr[6]) +
 315                  (value2 * rhsPtr[10])+
 316                  (value3 * rhsPtr[14]);
 317
 318     temp[loc3] = (value0 * rhsPtr[3]) +
 319                  (value1 * rhsPtr[7]) +
 320                  (value2 * rhsPtr[11])+
 321                  (value3 * rhsPtr[15]);
 322   }
 323
 324 #else
 325
 326   // 64 32bit registers,
 327   // aliased to
 328   // d = 64 bit double-word d0 -d31
 329   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 330   // e.g. q0 = d0 and d1
 331
 332   // load and stores interleaved as NEON can load and store while calculating
 333   asm volatile ( "VLDM         %1,  {q0-q3}        \n\t"   // load matrix 1 (lhsPtr) q[0..q3]
 334                  "VLDM         %0,  {q8-q11}       \n\t"   // load matrix 2 (rhsPtr) q[q8-q11]
 335                  "VMUL.F32     q12, q8, d0[0]      \n\t"   // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 336                  "VMUL.F32     q13, q8, d2[0]      \n\t"   // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 337                  "VMUL.F32     q14, q8, d4[0]      \n\t"   // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 338                  "VMUL.F32     q15, q8, d6[0]      \n\t"   // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 339
 340                  "VMLA.F32     q12, q9, d0[1]      \n\t"   // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 341                  "VMLA.F32     q13, q9, d2[1]      \n\t"   // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 342                  "VMLA.F32     q14, q9, d4[1]      \n\t"   // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 343                  "VMLA.F32     q15, q9, d6[1]      \n\t"   // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 344
 345                  "VMLA.F32     q12, q10, d1[0]     \n\t"   // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 346                  "VMLA.F32     q13, q10, d3[0]     \n\t"   // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 347                  "VMLA.F32     q14, q10, d5[0]     \n\t"   // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 348                  "VMLA.F32     q15, q10, d7[0]     \n\t"   // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 349
 350                  "VMLA.F32     q12, q11, d1[1]     \n\t"   // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 351                  "VMLA.F32     q13, q11, d3[1]     \n\t"   // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 352                  "VMLA.F32     q14, q11, d5[1]     \n\t"   // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 353                  "VMLA.F32     q15, q11, d7[1]     \n\t"   // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 354                  "VSTM         %2,  {q12-q15}      \n\t"   // store entire output matrix.
 355                  : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 356                  :
 357                  : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory" );
 358
 359 #endif
 360 }
 361
 362 void Matrix::Multiply( Matrix& result, const Matrix& lhs, const Quaternion& rhs )
 363 {
 364   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 365   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,54); // 54 = 36+18
 366
 367   float matrix[16];
 368   float* rhsPtr = &matrix[0];
 369   Convert( rhsPtr, rhs );
 370
 371   // quaternion contains just rotation so it really only needs 3x3 matrix
 372
 373   float* temp = result.AsFloat();
 374   const float* lhsPtr = lhs.AsFloat();
 375
 376 #ifndef  __ARM_NEON__
 377
 378   for( int32_t i=0; i < 4; i++ )
 379   {
 380     // i<<2 gives the first vector / column
 381     int32_t loc = i<<2;
 382     int32_t loc1 = loc + 1;
 383     int32_t loc2 = loc + 2;
 384     int32_t loc3 = loc + 3;
 385     float value0 = lhsPtr[loc];
 386     float value1 = lhsPtr[loc1];
 387     float value2 = lhsPtr[loc2];
 388     float value3 = lhsPtr[loc3];
 389     temp[loc]  = (value0 * rhsPtr[0]) +
 390                  (value1 * rhsPtr[4]) +
 391                  (value2 * rhsPtr[8]) +
 392                  (0.0f); //value3 * rhsPtr[12] is 0.0f
 393
 394     temp[loc1] = (value0 * rhsPtr[1]) +
 395                  (value1 * rhsPtr[5]) +
 396                  (value2 * rhsPtr[9]) +
 397                  (0.0f); //value3 * rhsPtr[13] is 0.0f
 398
 399     temp[loc2] = (value0 * rhsPtr[2]) +
 400                  (value1 * rhsPtr[6]) +
 401                  (value2 * rhsPtr[10])+
 402                  (0.0f); //value3 * rhsPtr[14] is 0.0f
 403
 404     temp[loc3] = (0.0f) + //value0 * rhsPtr[3] is 0.0f
 405                  (0.0f) + //value1 * rhsPtr[7] is 0.0f
 406                  (0.0f) + //value2 * rhsPtr[11] is 0.0f
 407                  (value3); // rhsPtr[15] is 1.0f
 408   }
 409
 410 #else
 411
 412   // 64 32bit registers,
 413   // aliased to
 414   // d = 64 bit double-word d0 -d31
 415   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 416   // e.g. q0 = d0 and d1
 417   // load and stores interleaved as NEON can load and store while calculating
 418   asm volatile ( "VLDM         %1,   {q4-q6}       \n\t" // load matrix 1 (lhsPtr)
 419                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
 420                  "VMUL.F32     q0,   q7,   d8[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 421                  "VMUL.F32     q1,   q7,   d10[0]  \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 422                  "VMUL.F32     q2,   q7,   d12[0]  \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 423                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
 424                  "VMLA.F32     q0,   q7,   d8[1]   \n\t" // column 0+= rhsPtr[4..7] * lhsPtr[0..3]
 425                  "VMLA.F32     q1,   q7,   d10[1]  \n\t" // column 1+= rhsPtr[4..7] * lhsPtr[4..7]
 426                  "VMLA.F32     q2,   q7,   d12[1]  \n\t" // column 2+= rhsPtr[4..7] * lhsPtr[8..11]
 427                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
 428                  "VMLA.F32     q0,   q7,   d9[0]   \n\t" // column 0+= rhsPtr[8..11] * lhsPtr[0..3]
 429                  "VMLA.F32     q1,   q7,   d11[0]  \n\t" // column 1+= rhsPtr[8..11] * lhsPtr[4..7]
 430                  "VMLA.F32     q2,   q7,   d13[0]  \n\t" // column 2+= rhsPtr[8..11] * lhsPtr[8..11]
 431                  "VSTM         %0,   {q0-q2}       \n\t" // store entire output matrix.
 432                  :
 433                  : "r"(temp), "r"(lhsPtr), "r" (rhsPtr)
 434                  : "%r0", "%q0", "%q1", "%q2", "%q4", "%q5", "%q6", "%q7", "memory" );
 435
 436   temp[ 12 ] = 0.0f;
 437   temp[ 13 ] = 0.0f;
 438   temp[ 14 ] = 0.0f;
 439   temp[ 15 ] = 1.0f;
 440 #endif
 441 }
 442
 443 Vector4 Matrix::operator*(const Vector4& rhs) const
 444 {
 445   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,16);
 446
 447   Vector4 temp;
 448
 449 #ifndef  __ARM_NEON__
 450
 451   temp.x = rhs.x * mMatrix[0] + rhs.y * mMatrix[4] + rhs.z * mMatrix[8]  +  rhs.w * mMatrix[12];
 452   temp.y = rhs.x * mMatrix[1] + rhs.y * mMatrix[5] + rhs.z * mMatrix[9]  +  rhs.w * mMatrix[13];
 453   temp.z = rhs.x * mMatrix[2] + rhs.y * mMatrix[6] + rhs.z * mMatrix[10] +  rhs.w * mMatrix[14];
 454   temp.w = rhs.x * mMatrix[3] + rhs.y * mMatrix[7] + rhs.z * mMatrix[11] +  rhs.w * mMatrix[15];
 455
 456 #else
 457
 458   // 64 32bit registers,
 459   // aliased to
 460   // d = 64 bit double-word d0 -d31
 461   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 462   // e.g. q0 = d0 and d1
 463   // load and stores interleaved as NEON can load and store while calculating
 464   asm volatile ( "VLD1.F32     {q0}, [%1]        \n\t"   //q0 = rhs
 465                  "VLD1.F32     {q9}, [%0]!       \n\t"
 466                  "VMUL.F32     q10,  q9,   d0[0] \n\t"
 467                  "VLD1.F32     {q9}, [%0]!       \n\t"
 468                  "VMLA.F32     q10,  q9,   d0[1] \n\t"   //q10 = mMatrix[0..3] * rhs + mMatrix[4..7] * rhs
 469                  "VLD1.F32     {q9}, [%0]!       \n\t"
 470                  "VMUL.F32     q11,  q9,   d1[0] \n\t"
 471                  "VLD1.F32     {q9}, [%0]!       \n\t"
 472                  "VMLA.F32     q11,  q9,   d1[1] \n\t"   //q11 = mMatrix[8..11] * rhs + mMatrix[12..15] * rhs
 473                  "VADD.F32     q10,  q10,  q11   \n\t"
 474                  "VST1.F32     {q10},[%2]        \n\t"   //temp = q10 + q11
 475                  :
 476                  : "r"(mMatrix), "r"(&rhs), "r"(&temp)
 477                  : "q0", "q9", "q10", "q11", "memory" );
 478 #endif
 479   return temp;
 480 }
 481
 482 bool Matrix::operator==(const Matrix& rhs) const
 483 {
 484   return (
 485   ( fabsf( mMatrix[0] - rhs.mMatrix[0] ) <= GetRangedEpsilon( mMatrix[0], rhs.mMatrix[0] ) ) &&
 486   ( fabsf( mMatrix[1] - rhs.mMatrix[1] ) <= GetRangedEpsilon( mMatrix[1], rhs.mMatrix[1] ) ) &&
 487   ( fabsf( mMatrix[2] - rhs.mMatrix[2] ) <= GetRangedEpsilon( mMatrix[2], rhs.mMatrix[2] ) ) &&
 488   ( fabsf( mMatrix[3] - rhs.mMatrix[3] ) <= GetRangedEpsilon( mMatrix[3], rhs.mMatrix[3] ) ) &&
 489   ( fabsf( mMatrix[4] - rhs.mMatrix[4] ) <= GetRangedEpsilon( mMatrix[4], rhs.mMatrix[4] ) ) &&
 490   ( fabsf( mMatrix[5] - rhs.mMatrix[5] ) <= GetRangedEpsilon( mMatrix[5], rhs.mMatrix[5] ) ) &&
 491   ( fabsf( mMatrix[6] - rhs.mMatrix[6] ) <= GetRangedEpsilon( mMatrix[6], rhs.mMatrix[6] ) ) &&
 492   ( fabsf( mMatrix[7] - rhs.mMatrix[7] ) <= GetRangedEpsilon( mMatrix[7], rhs.mMatrix[7] ) ) &&
 493   ( fabsf( mMatrix[8] - rhs.mMatrix[8] ) <= GetRangedEpsilon( mMatrix[8], rhs.mMatrix[8] ) ) &&
 494   ( fabsf( mMatrix[9] - rhs.mMatrix[9] ) <= GetRangedEpsilon( mMatrix[9], rhs.mMatrix[9] ) ) &&
 495   ( fabsf( mMatrix[10] - rhs.mMatrix[10] ) <= GetRangedEpsilon( mMatrix[10], rhs.mMatrix[10] ) ) &&
 496   ( fabsf( mMatrix[11] - rhs.mMatrix[11] ) <= GetRangedEpsilon( mMatrix[11], rhs.mMatrix[11] ) ) &&
 497   ( fabsf( mMatrix[12] - rhs.mMatrix[12] ) <= GetRangedEpsilon( mMatrix[12], rhs.mMatrix[12] ) ) &&
 498   ( fabsf( mMatrix[13] - rhs.mMatrix[13] ) <= GetRangedEpsilon( mMatrix[13], rhs.mMatrix[13] ) ) &&
 499   ( fabsf( mMatrix[14] - rhs.mMatrix[14] ) <= GetRangedEpsilon( mMatrix[14], rhs.mMatrix[14] ) ) &&
 500   ( fabsf( mMatrix[15] - rhs.mMatrix[15] ) <= GetRangedEpsilon( mMatrix[15], rhs.mMatrix[15] ) ) );
 501 }
 502
 503 bool Matrix::operator!=(const Matrix& rhs) const
 504 {
 505   if (*this == rhs)
 506   {
 507     return false;
 508   }
 509
 510   return true;
 511 }
 512
 513 void Matrix::OrthoNormalize()
 514 {
 515   Vector4 vector0(GetXAxis());
 516   Vector4 vector1(GetYAxis());
 517   Vector4 vector2(GetZAxis());
 518
 519   vector0.Normalize();
 520   vector1.Normalize();
 521   vector2 = vector0.Cross( vector1 );
 522   vector1 = vector2.Cross( vector0 );
 523
 524   memcpy( mMatrix, &vector0, NUM_BYTES_IN_ROW );
 525   memcpy( mMatrix + ROW1_OFFSET, &vector1, NUM_BYTES_IN_ROW );
 526   memcpy( mMatrix + ROW2_OFFSET, &vector2, NUM_BYTES_IN_ROW );
 527 }
 528
 529 Vector3 Matrix::GetXAxis() const
 530 {
 531   return Vector3(mMatrix[0], mMatrix[1], mMatrix[2]);
 532 }
 533
 534 Vector3 Matrix::GetYAxis() const
 535 {
 536   return Vector3(mMatrix[4], mMatrix[5], mMatrix[6]);
 537 }
 538
 539 Vector3 Matrix::GetZAxis() const
 540 {
 541   return Vector3(mMatrix[8], mMatrix[9], mMatrix[10]);
 542 }
 543
 544 void Matrix::SetXAxis(const Vector3& axis)
 545 {
 546   mMatrix[0] = axis.x;
 547   mMatrix[1] = axis.y;
 548   mMatrix[2] = axis.z;
 549 }
 550
 551 void Matrix::SetYAxis(const Vector3& axis)
 552 {
 553   mMatrix[4] = axis.x;
 554   mMatrix[5] = axis.y;
 555   mMatrix[6] = axis.z;
 556 }
 557
 558 void Matrix::SetZAxis(const Vector3& axis)
 559 {
 560   mMatrix[8] = axis.x;
 561   mMatrix[9] = axis.y;
 562   mMatrix[10] = axis.z;
 563 }
 564
 565 void Matrix::SetTransformComponents(const Vector3&    scale,
 566                                     const Quaternion& rotation,
 567                                     const Vector3&    translation )
 568 {
 569   if( rotation.IsIdentity() )
 570   {
 571     mMatrix[0] = scale.x;
 572     mMatrix[1] = 0.0f;
 573     mMatrix[2] = 0.0f;
 574     mMatrix[3] = 0.0f;
 575
 576     mMatrix[4] = 0.0f;
 577     mMatrix[5] = scale.y;
 578     mMatrix[6] = 0.0f;
 579     mMatrix[7] = 0.0f;
 580
 581     mMatrix[8] = 0.0f;
 582     mMatrix[9] = 0.0f;
 583     mMatrix[10]= scale.z;
 584     mMatrix[11]= 0.0f;
 585   }
 586   else
 587   {
 588     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 589     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,27); // 27 = 9+18
 590
 591     const float xx = rotation.mVector.x * rotation.mVector.x;
 592     const float yy = rotation.mVector.y * rotation.mVector.y;
 593     const float zz = rotation.mVector.z * rotation.mVector.z;
 594     const float xy = rotation.mVector.x * rotation.mVector.y;
 595     const float xz = rotation.mVector.x * rotation.mVector.z;
 596     const float wx = rotation.mVector.w * rotation.mVector.x;
 597     const float wy = rotation.mVector.w * rotation.mVector.y;
 598     const float wz = rotation.mVector.w * rotation.mVector.z;
 599     const float yz = rotation.mVector.y * rotation.mVector.z;
 600
 601     mMatrix[0] = (scale.x * (1.0f - 2.0f * (yy + zz)));
 602     mMatrix[1] = (scale.x * (       2.0f * (xy + wz)));
 603     mMatrix[2] = (scale.x * (       2.0f * (xz - wy)));
 604     mMatrix[3] = 0.0f;
 605
 606     mMatrix[4] = (scale.y * (       2.0f * (xy - wz)));
 607     mMatrix[5] = (scale.y * (1.0f - 2.0f * (xx + zz)));
 608     mMatrix[6] = (scale.y * (       2.0f * (yz + wx)));
 609     mMatrix[7] = 0.0f;
 610
 611     mMatrix[8] = (scale.z * (       2.0f * (xz + wy)));
 612     mMatrix[9] = (scale.z * (       2.0f * (yz - wx)));
 613     mMatrix[10]= (scale.z * (1.0f - 2.0f * (xx + yy)));
 614     mMatrix[11]= 0.0f;
 615   }
 616   // apply translation
 617   mMatrix[12] = translation.x;
 618   mMatrix[13] = translation.y;
 619   mMatrix[14] = translation.z;
 620   mMatrix[15] = 1.0f;
 621 }
 622
 623 void Matrix::SetInverseTransformComponents(const Vector3&    scale,
 624                                            const Quaternion& rotation,
 625                                            const Vector3&    translation )
 626 {
 627   Vector3 inverseTranslation = -translation;
 628   Vector3 inverseScale( 1.0f/scale.x, 1.0f/scale.y, 1.0f/scale.z);
 629   Quaternion inverseRotation(rotation);
 630   bool isRotated = ! inverseRotation.IsIdentity();
 631
 632   // Order of application is translation, rotation, scale.
 633   // Ensure translation is relative to scale & rotation:
 634
 635   if( isRotated )
 636   {
 637     inverseRotation.Invert();
 638     inverseTranslation = inverseRotation.Rotate(inverseTranslation);
 639   }
 640
 641   inverseTranslation *= inverseScale;
 642
 643   if( isRotated )
 644   {
 645     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 646     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,27); // 27 = 9+18
 647
 648     const float xx = inverseRotation.mVector.x * inverseRotation.mVector.x;
 649     const float yy = inverseRotation.mVector.y * inverseRotation.mVector.y;
 650     const float zz = inverseRotation.mVector.z * inverseRotation.mVector.z;
 651     const float xy = inverseRotation.mVector.x * inverseRotation.mVector.y;
 652     const float xz = inverseRotation.mVector.x * inverseRotation.mVector.z;
 653     const float wx = inverseRotation.mVector.w * inverseRotation.mVector.x;
 654     const float wy = inverseRotation.mVector.w * inverseRotation.mVector.y;
 655     const float wz = inverseRotation.mVector.w * inverseRotation.mVector.z;
 656     const float yz = inverseRotation.mVector.y * inverseRotation.mVector.z;
 657
 658     mMatrix[0] = (inverseScale.x * (1.0f - 2.0f * (yy + zz)));
 659     mMatrix[1] = (inverseScale.y * (2.0f * (xy + wz)));
 660     mMatrix[2] = (inverseScale.z * (2.0f * (xz - wy)));
 661     mMatrix[3] = 0.0f;
 662
 663     mMatrix[4] = (inverseScale.x * (2.0f * (xy - wz)));
 664     mMatrix[5] = (inverseScale.y * (1.0f - 2.0f * (xx + zz)));
 665     mMatrix[6] = (inverseScale.z * (2.0f * (yz + wx)));
 666     mMatrix[7] = 0.0f;
 667
 668     mMatrix[8] = (inverseScale.x * (2.0f * (xz + wy)));
 669     mMatrix[9] = (inverseScale.y * (2.0f * (yz - wx)));
 670     mMatrix[10]= (inverseScale.z * (1.0f - 2.0f * (xx + yy)));
 671     mMatrix[11]= 0.0f;
 672   }
 673   else
 674   {
 675     mMatrix[0] = inverseScale.x;
 676     mMatrix[1] = 0.0f;
 677     mMatrix[2] = 0.0f;
 678     mMatrix[3] = 0.0f;
 679
 680     mMatrix[4] = 0.0f;
 681     mMatrix[5] = inverseScale.y;
 682     mMatrix[6] = 0.0f;
 683     mMatrix[7] = 0.0f;
 684
 685     mMatrix[8] = 0.0f;
 686     mMatrix[9] = 0.0f;
 687     mMatrix[10]= inverseScale.z;
 688     mMatrix[11]= 0.0f;
 689   }
 690
 691   // apply translation
 692   mMatrix[12] = inverseTranslation.x;
 693   mMatrix[13] = inverseTranslation.y;
 694   mMatrix[14] = inverseTranslation.z;
 695   mMatrix[15] = 1.0f;
 696 }
 697
 698 void Matrix::SetInverseTransformComponents(const Vector3&    xAxis,
 699                                            const Vector3&    yAxis,
 700                                            const Vector3&    zAxis,
 701                                            const Vector3&    translation )
 702 {
 703   // x, y, z axis parameters represent a orthonormal basis with no scaling, i.e. a rotation matrix.
 704   // Invert rotation by transposing in place
 705
 706   // Order of application is translation, rotation
 707
 708   mMatrix[0]  = xAxis.x;
 709   mMatrix[1]  = yAxis.x;
 710   mMatrix[2]  = zAxis.x;
 711   mMatrix[3]  = 0.0f;
 712
 713   mMatrix[4]  = xAxis.y;
 714   mMatrix[5]  = yAxis.y;
 715   mMatrix[6]  = zAxis.y;
 716   mMatrix[7]  = 0.0f;
 717
 718   mMatrix[8]  = xAxis.z;
 719   mMatrix[9]  = yAxis.z;
 720   mMatrix[10] = zAxis.z;
 721   mMatrix[11] = 0.0f;
 722   mMatrix[12] = 0.0f;
 723   mMatrix[13] = 0.0f;
 724   mMatrix[14] = 0.0f;
 725   mMatrix[15] = 1.0f;
 726
 727   // Ensure translation is relative to scale & rotation:
 728
 729   Vector4 inverseTranslation( -translation.x, -translation.y, -translation.z, 1.0f);
 730   inverseTranslation = *this * inverseTranslation; // Rotate inverse translation
 731   inverseTranslation.w = 1.0f;
 732   SetTranslation(inverseTranslation);
 733 }
 734
 735
 736 void Matrix::GetTransformComponents(Vector3&     position,
 737                                     Quaternion&  rotation,
 738                                     Vector3&     scale) const
 739 {
 740   position = GetTranslation3();
 741
 742   // Derive scale from axis lengths.
 743   Vector3 theScale(GetXAxis().Length(), GetYAxis().Length(), GetZAxis().Length());
 744   scale = theScale;
 745
 746   if( ! ( fabs(theScale.x - Vector3::ONE.x) < ROTATION_EPSILON &&
 747           fabs(theScale.y - Vector3::ONE.y) < ROTATION_EPSILON &&
 748           fabs(theScale.z - Vector3::ONE.z) < ROTATION_EPSILON ) )
 749   {
 750     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 751     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,9);
 752
 753     // Non-identity scale is embedded into rotation matrix. Remove it first:
 754     Matrix m(*this);
 755     Vector3 inverseScale(1.0f/theScale.x, 1.0f/theScale.y, 1.0f/theScale.z);
 756     m.mMatrix[0] *= inverseScale.x;
 757     m.mMatrix[1] *= inverseScale.x;
 758     m.mMatrix[2] *= inverseScale.x;
 759     m.mMatrix[4] *= inverseScale.y;
 760     m.mMatrix[5] *= inverseScale.y;
 761     m.mMatrix[6] *= inverseScale.y;
 762     m.mMatrix[8] *= inverseScale.z;
 763     m.mMatrix[9] *= inverseScale.z;
 764     m.mMatrix[10] *= inverseScale.z;
 765
 766     Quaternion theRotation(m);
 767
 768     // If the imaginary components are close to zero, then use null quaternion instead.
 769     if( fabs(theRotation.mVector.x) < ROTATION_EPSILON &&
 770         fabs(theRotation.mVector.y) < ROTATION_EPSILON &&
 771         fabs(theRotation.mVector.z) < ROTATION_EPSILON )
 772     {
 773       theRotation = Quaternion();
 774     }
 775     rotation = theRotation;
 776   }
 777   else
 778   {
 779     Quaternion theRotation(*this);
 780
 781     // If the imaginary components are close to zero, then use null quaternion instead.
 782     if( fabs(theRotation.mVector.x) < ROTATION_EPSILON &&
 783         fabs(theRotation.mVector.y) < ROTATION_EPSILON &&
 784         fabs(theRotation.mVector.z) < ROTATION_EPSILON )
 785     {
 786       theRotation = Quaternion();
 787     }
 788     rotation = theRotation;
 789   }
 790 }
 791
 792
 793
 794 std::ostream& operator<< (std::ostream& o, const Matrix& matrix)
 795 {
 796   return o << "[ " << matrix.mMatrix[0]  << ", " << matrix.mMatrix[1]  << ", " << matrix.mMatrix[2]  << ", " << matrix.mMatrix[3]  << ", "
 797                    << matrix.mMatrix[4]  << ", " << matrix.mMatrix[5]  << ", " << matrix.mMatrix[6]  << ", " << matrix.mMatrix[7]  << ", "
 798                    << matrix.mMatrix[8]  << ", " << matrix.mMatrix[9]  << ", " << matrix.mMatrix[10] << ", " << matrix.mMatrix[11] << ", "
 799                    << matrix.mMatrix[12] << ", " << matrix.mMatrix[13] << ", " << matrix.mMatrix[14] << ", " << matrix.mMatrix[15] << " ]";
 800 }
 801
 802 } // namespace Dali