dali/public-api/math/matrix.cpp

   1 /*
   2  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // CLASS HEADERS
  19 #include <dali/public-api/math/matrix.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <math.h>
  23 #include <string.h>
  24
  25 // INTERNAL INCLUDES
  26 #include <dali/public-api/common/dali-common.h>
  27 #include <dali/public-api/math/vector3.h>
  28 #include <dali/public-api/math/vector4.h>
  29 #include <dali/public-api/math/quaternion.h>
  30 #include <dali/public-api/math/math-utils.h>
  31 #include <dali/internal/render/common/performance-monitor.h>
  32
  33 namespace
  34 {
  35 const float ROTATION_EPSILON = 0.003f; // Deliberately large
  36
  37 const size_t NUM_BYTES_IN_ROW_OF_3( 3 * sizeof( float ) );
  38 const size_t NUM_BYTES_IN_ROW( 4 * sizeof( float ) );
  39 const size_t NUM_BYTES_IN_MATRIX( 16 * sizeof( float ) );
  40 const size_t ROW1_OFFSET( 4 );
  41 const size_t ROW2_OFFSET( 8 );
  42 const size_t ROW3_OFFSET( 12 );
  43
  44 /**
  45  * Helper to convert to Quaternion to float16 array
  46  */
  47 void Convert( float*& m, const Dali::Quaternion& rotation )
  48 {
  49   const float xx = rotation.mVector.x * rotation.mVector.x;
  50   const float yy = rotation.mVector.y * rotation.mVector.y;
  51   const float zz = rotation.mVector.z * rotation.mVector.z;
  52   const float xy = rotation.mVector.x * rotation.mVector.y;
  53   const float xz = rotation.mVector.x * rotation.mVector.z;
  54   const float wx = rotation.mVector.w * rotation.mVector.x;
  55   const float wy = rotation.mVector.w * rotation.mVector.y;
  56   const float wz = rotation.mVector.w * rotation.mVector.z;
  57   const float yz = rotation.mVector.y * rotation.mVector.z;
  58
  59   m[0] = 1.0f - 2.0f * (yy + zz);
  60   m[1] =        2.0f * (xy + wz);
  61   m[2] =        2.0f * (xz - wy);
  62   m[3] = 0.0f;
  63
  64   m[4] =        2.0f * (xy - wz);
  65   m[5] = 1.0f - 2.0f * (xx + zz);
  66   m[6] =        2.0f * (yz + wx);
  67   m[7] = 0.0f;
  68
  69   m[8] =        2.0f * (xz + wy);
  70   m[9] =        2.0f * (yz - wx);
  71   m[10]= 1.0f - 2.0f * (xx + yy);
  72   m[11]= 0.0f;
  73
  74   m[12]= 0.0f;
  75   m[13]= 0.0f;
  76   m[14]= 0.0f;
  77   m[15]= 1.0f;
  78 }
  79 }
  80
  81 namespace Dali
  82 {
  83
  84 using Internal::PerformanceMonitor;
  85
  86 const float identityArray[] = {1.0f, 0.0f, 0.0f, 0.0f,
  87                                0.0f, 1.0f, 0.0f, 0.0f,
  88                                0.0f, 0.0f, 1.0f, 0.0f,
  89                                0.0f, 0.0f, 0.0f, 1.0f};
  90
  91 const Matrix Matrix::IDENTITY(identityArray);
  92
  93 Matrix::Matrix()
  94 {
  95   memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
  96 }
  97
  98 Matrix::Matrix( bool initialize )
  99 {
 100   if( initialize )
 101   {
 102     memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
 103   }
 104 }
 105
 106 Matrix::Matrix(const float* array)
 107 {
 108   memcpy( mMatrix, array, NUM_BYTES_IN_MATRIX );
 109 }
 110
 111 Matrix::Matrix( const Quaternion& rotation )
 112 {
 113   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,18);
 114
 115   float* matrixPtr = &mMatrix[0];
 116   Convert( matrixPtr, rotation );
 117 }
 118
 119 Matrix::Matrix( const Matrix& matrix )
 120 {
 121   memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 122 }
 123
 124 Matrix& Matrix::operator=( const Matrix& matrix )
 125 {
 126   // no point copying if self assigning
 127   if( this != &matrix )
 128   {
 129     memcpy( mMatrix, matrix.mMatrix, NUM_BYTES_IN_MATRIX );
 130   }
 131   return *this;
 132 }
 133
 134 void Matrix::InvertTransform(Matrix& result) const
 135 {
 136   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,12);
 137
 138   float* m1 = result.AsFloat();
 139
 140   DALI_ASSERT_ALWAYS( EqualsZero( mMatrix[3] ) && EqualsZero( mMatrix[7] ) && EqualsZero( mMatrix[11] ) && Equals( mMatrix[15], 1.0f ) && "Must be a transform matrix" );
 141
 142   m1[0] = mMatrix[0];
 143   m1[1] = mMatrix[4];
 144   m1[2] = mMatrix[8];
 145   m1[3] = 0.0f;
 146
 147   m1[4] = mMatrix[1];
 148   m1[5] = mMatrix[5];
 149   m1[6] = mMatrix[9];
 150   m1[7] = 0.0f;
 151
 152   m1[8] = mMatrix[2];
 153   m1[9] = mMatrix[6];
 154   m1[10] = mMatrix[10];
 155   m1[11] = 0.0f;
 156
 157   m1[12] = -( ( mMatrix[0] * mMatrix[12] ) + ( mMatrix[1] * mMatrix[13] ) + ( mMatrix[2] * mMatrix[14] ) + ( mMatrix[3] * mMatrix[15] ) );
 158   m1[13] = -( ( mMatrix[4] * mMatrix[12] ) + ( mMatrix[5] * mMatrix[13] ) + ( mMatrix[6] * mMatrix[14] ) + ( mMatrix[7] * mMatrix[15] ) );
 159   m1[14] = -( ( mMatrix[8] * mMatrix[12] ) + ( mMatrix[9] * mMatrix[13] ) + ( mMatrix[10] * mMatrix[14] ) + ( mMatrix[11] * mMatrix[15] ) );
 160   m1[15] = 1.0f;
 161 }
 162
 163 static bool InvertMatrix(const float* m, float* out)
 164 {
 165   float inv[16];
 166
 167   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,192);  // 12 x 16 multiples
 168
 169   inv[0] =   m[5]*m[10]*m[15] - m[5]*m[11]*m[14] - m[9]*m[6]*m[15] + m[9]*m[7]*m[14] + m[13]*m[6]*m[11] - m[13]*m[7]*m[10];
 170   inv[4] =  -m[4]*m[10]*m[15] + m[4]*m[11]*m[14] + m[8]*m[6]*m[15] - m[8]*m[7]*m[14] - m[12]*m[6]*m[11] + m[12]*m[7]*m[10];
 171   inv[8] =   m[4]*m[9]*m[15] - m[4]*m[11]*m[13] - m[8]*m[5]*m[15] + m[8]*m[7]*m[13] + m[12]*m[5]*m[11] - m[12]*m[7]*m[9];
 172   inv[12] = -m[4]*m[9]*m[14] + m[4]*m[10]*m[13] + m[8]*m[5]*m[14] - m[8]*m[6]*m[13] - m[12]*m[5]*m[10] + m[12]*m[6]*m[9];
 173   inv[1] =  -m[1]*m[10]*m[15] + m[1]*m[11]*m[14] + m[9]*m[2]*m[15] - m[9]*m[3]*m[14] - m[13]*m[2]*m[11] + m[13]*m[3]*m[10];
 174   inv[5] =   m[0]*m[10]*m[15] - m[0]*m[11]*m[14] - m[8]*m[2]*m[15] + m[8]*m[3]*m[14] + m[12]*m[2]*m[11] - m[12]*m[3]*m[10];
 175   inv[9] =  -m[0]*m[9]*m[15] + m[0]*m[11]*m[13] + m[8]*m[1]*m[15] - m[8]*m[3]*m[13] - m[12]*m[1]*m[11] + m[12]*m[3]*m[9];
 176   inv[13] =  m[0]*m[9]*m[14] - m[0]*m[10]*m[13] - m[8]*m[1]*m[14] + m[8]*m[2]*m[13] + m[12]*m[1]*m[10] - m[12]*m[2]*m[9];
 177   inv[2] =   m[1]*m[6]*m[15] - m[1]*m[7]*m[14] - m[5]*m[2]*m[15] + m[5]*m[3]*m[14] + m[13]*m[2]*m[7] - m[13]*m[3]*m[6];
 178   inv[6] =  -m[0]*m[6]*m[15] + m[0]*m[7]*m[14] + m[4]*m[2]*m[15] - m[4]*m[3]*m[14] - m[12]*m[2]*m[7] + m[12]*m[3]*m[6];
 179   inv[10] =  m[0]*m[5]*m[15] - m[0]*m[7]*m[13] - m[4]*m[1]*m[15] + m[4]*m[3]*m[13] + m[12]*m[1]*m[7] - m[12]*m[3]*m[5];
 180   inv[14] = -m[0]*m[5]*m[14] + m[0]*m[6]*m[13] + m[4]*m[1]*m[14] - m[4]*m[2]*m[13] - m[12]*m[1]*m[6] + m[12]*m[2]*m[5];
 181   inv[3] =  -m[1]*m[6]*m[11] + m[1]*m[7]*m[10] + m[5]*m[2]*m[11] - m[5]*m[3]*m[10] - m[9]*m[2]*m[7] + m[9]*m[3]*m[6];
 182   inv[7] =   m[0]*m[6]*m[11] - m[0]*m[7]*m[10] - m[4]*m[2]*m[11] + m[4]*m[3]*m[10] + m[8]*m[2]*m[7] - m[8]*m[3]*m[6];
 183   inv[11] = -m[0]*m[5]*m[11] + m[0]*m[7]*m[9] + m[4]*m[1]*m[11] - m[4]*m[3]*m[9] - m[8]*m[1]*m[7] + m[8]*m[3]*m[5];
 184   inv[15] =  m[0]*m[5]*m[10] - m[0]*m[6]*m[9] - m[4]*m[1]*m[10] + m[4]*m[2]*m[9] + m[8]*m[1]*m[6] - m[8]*m[2]*m[5];
 185
 186   float det = m[0]*inv[0] + m[1]*inv[4] + m[2]*inv[8] + m[3]*inv[12];
 187
 188   // In the case where the determinant is exactly zero, the matrix is non-invertible
 189   if ( EqualsZero( det ) )
 190   {
 191     return false;
 192   }
 193
 194   det = 1.0 / det;
 195
 196   for (int i = 0; i < 16; i++)
 197   {
 198     out[i] = inv[i] * det;
 199   }
 200
 201   return true;
 202 }
 203
 204 bool Matrix::Invert()
 205 {
 206   Matrix temp(*this);
 207
 208   return InvertMatrix(temp.AsFloat(), mMatrix);
 209 }
 210
 211 void Matrix::Transpose()
 212 {
 213   float temp = mMatrix[1];
 214   mMatrix[1] = mMatrix[4];
 215   mMatrix[4] = temp;
 216
 217   temp = mMatrix[2];
 218   mMatrix[2] = mMatrix[8];
 219   mMatrix[8] = temp;
 220
 221   temp = mMatrix[3];
 222   mMatrix[3] = mMatrix[12];
 223   mMatrix[12] = temp;
 224
 225   temp = mMatrix[6];
 226   mMatrix[6] = mMatrix[9];
 227   mMatrix[9] = temp;
 228
 229   temp = mMatrix[7];
 230   mMatrix[7] = mMatrix[13];
 231   mMatrix[13] = temp;
 232
 233   temp = mMatrix[11];
 234   mMatrix[11] = mMatrix[14];
 235   mMatrix[14] = temp;
 236 }
 237
 238 void Matrix::SetIdentity()
 239 {
 240   memcpy( mMatrix, identityArray, NUM_BYTES_IN_MATRIX );
 241 }
 242
 243 void Matrix::SetIdentityAndScale( const Vector3& scale )
 244 {
 245   // initialize to zeros
 246   memset( mMatrix, 0, NUM_BYTES_IN_MATRIX );
 247
 248   // just apply scale on the diagonal
 249   mMatrix[0]  = scale.x;
 250   mMatrix[5]  = scale.y;
 251   mMatrix[10] = scale.z;
 252   mMatrix[15] = 1.0f;
 253 }
 254
 255 void Matrix::SetTranslation(const Vector4& translation)
 256 {
 257   memcpy( mMatrix + ROW3_OFFSET, &translation, NUM_BYTES_IN_ROW );
 258 }
 259 void Matrix::SetTranslation(const Vector3& other)
 260 {
 261   memcpy( mMatrix + ROW3_OFFSET, &other, NUM_BYTES_IN_ROW_OF_3 );
 262   mMatrix[15] = 1.0f;
 263 }
 264
 265 void Matrix::Multiply( Matrix& result, const Matrix& lhs, const Matrix& rhs )
 266 {
 267   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 268   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,64); // 64 = 16*4
 269
 270   float* temp = result.AsFloat();
 271   const float* rhsPtr  = rhs.AsFloat();
 272   const float* lhsPtr = lhs.AsFloat();
 273
 274 #ifndef  __ARM_NEON__
 275
 276   for( int i=0; i < 4; i++ )
 277   {
 278     // i<<2 gives the first vector / column
 279     int loc = i<<2;
 280     int loc1 = loc + 1;
 281     int loc2 = loc + 2;
 282     int loc3 = loc + 3;
 283     float value0 = lhsPtr[loc];
 284     float value1 = lhsPtr[loc1];
 285     float value2 = lhsPtr[loc2];
 286     float value3 = lhsPtr[loc3];
 287     temp[loc]  = (value0 * rhsPtr[0]) +
 288                  (value1 * rhsPtr[4]) +
 289                  (value2 * rhsPtr[8]) +
 290                  (value3 * rhsPtr[12]);
 291
 292     temp[loc1] = (value0 * rhsPtr[1]) +
 293                  (value1 * rhsPtr[5]) +
 294                  (value2 * rhsPtr[9]) +
 295                  (value3 * rhsPtr[13]);
 296
 297     temp[loc2] = (value0 * rhsPtr[2]) +
 298                  (value1 * rhsPtr[6]) +
 299                  (value2 * rhsPtr[10])+
 300                  (value3 * rhsPtr[14]);
 301
 302     temp[loc3] = (value0 * rhsPtr[3]) +
 303                  (value1 * rhsPtr[7]) +
 304                  (value2 * rhsPtr[11])+
 305                  (value3 * rhsPtr[15]);
 306   }
 307
 308 #else
 309
 310   // 64 32bit registers,
 311   // aliased to
 312   // d = 64 bit double-word d0 -d31
 313   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 314   // e.g. q0 = d0 and d1
 315
 316   // load and stores interleaved as NEON can load and store while calculating
 317   asm volatile ( "VLDM         %1,  {q0-q3}        \n\t"   // load matrix 1 (lhsPtr) q[0..q3]
 318                  "VLDM         %0,  {q8-q11}       \n\t"   // load matrix 2 (rhsPtr) q[q8-q11]
 319                  "VMUL.F32     q12, q8, d0[0]      \n\t"   // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 320                  "VMUL.F32     q13, q8, d2[0]      \n\t"   // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 321                  "VMUL.F32     q14, q8, d4[0]      \n\t"   // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 322                  "VMUL.F32     q15, q8, d6[0]      \n\t"   // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 323
 324                  "VMLA.F32     q12, q9, d0[1]      \n\t"   // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 325                  "VMLA.F32     q13, q9, d2[1]      \n\t"   // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 326                  "VMLA.F32     q14, q9, d4[1]      \n\t"   // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 327                  "VMLA.F32     q15, q9, d6[1]      \n\t"   // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 328
 329                  "VMLA.F32     q12, q10, d1[0]     \n\t"   // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 330                  "VMLA.F32     q13, q10, d3[0]     \n\t"   // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 331                  "VMLA.F32     q14, q10, d5[0]     \n\t"   // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 332                  "VMLA.F32     q15, q10, d7[0]     \n\t"   // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 333
 334                  "VMLA.F32     q12, q11, d1[1]     \n\t"   // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 335                  "VMLA.F32     q13, q11, d3[1]     \n\t"   // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 336                  "VMLA.F32     q14, q11, d5[1]     \n\t"   // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 337                  "VMLA.F32     q15, q11, d7[1]     \n\t"   // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 338                  "VSTM         %2,  {q12-q15}      \n\t"   // store entire output matrix.
 339                  : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 340                  :
 341                  : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory" );
 342
 343 #endif
 344 }
 345
 346 void Matrix::Multiply( Matrix& result, const Matrix& lhs, const Quaternion& rhs )
 347 {
 348   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 349   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,54); // 54 = 36+18
 350
 351   float matrix[16];
 352   float* rhsPtr = &matrix[0];
 353   Convert( rhsPtr, rhs );
 354
 355   // quaternion contains just rotation so it really only needs 3x3 matrix
 356
 357   float* temp = result.AsFloat();
 358   const float* lhsPtr = lhs.AsFloat();
 359
 360 #ifndef  __ARM_NEON__
 361
 362   for( int i=0; i < 4; i++ )
 363   {
 364     // i<<2 gives the first vector / column
 365     int loc = i<<2;
 366     int loc1 = loc + 1;
 367     int loc2 = loc + 2;
 368     int loc3 = loc + 3;
 369     float value0 = lhsPtr[loc];
 370     float value1 = lhsPtr[loc1];
 371     float value2 = lhsPtr[loc2];
 372     float value3 = lhsPtr[loc3];
 373     temp[loc]  = (value0 * rhsPtr[0]) +
 374                  (value1 * rhsPtr[4]) +
 375                  (value2 * rhsPtr[8]) +
 376                  (0.0f); //value3 * rhsPtr[12] is 0.0f
 377
 378     temp[loc1] = (value0 * rhsPtr[1]) +
 379                  (value1 * rhsPtr[5]) +
 380                  (value2 * rhsPtr[9]) +
 381                  (0.0f); //value3 * rhsPtr[13] is 0.0f
 382
 383     temp[loc2] = (value0 * rhsPtr[2]) +
 384                  (value1 * rhsPtr[6]) +
 385                  (value2 * rhsPtr[10])+
 386                  (0.0f); //value3 * rhsPtr[14] is 0.0f
 387
 388     temp[loc3] = (0.0f) + //value0 * rhsPtr[3] is 0.0f
 389                  (0.0f) + //value1 * rhsPtr[7] is 0.0f
 390                  (0.0f) + //value2 * rhsPtr[11] is 0.0f
 391                  (value3); // rhsPtr[15] is 1.0f
 392   }
 393
 394 #else
 395
 396   // 64 32bit registers,
 397   // aliased to
 398   // d = 64 bit double-word d0 -d31
 399   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 400   // e.g. q0 = d0 and d1
 401   // load and stores interleaved as NEON can load and store while calculating
 402   asm volatile ( "VLDM         %1,   {q4-q6}       \n\t" // load matrix 1 (lhsPtr)
 403                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
 404                  "VMUL.F32     q0,   q7,   d8[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 405                  "VMUL.F32     q1,   q7,   d10[0]  \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 406                  "VMUL.F32     q2,   q7,   d12[0]  \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 407                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
 408                  "VMLA.F32     q0,   q7,   d8[1]   \n\t" // column 0+= rhsPtr[4..7] * lhsPtr[0..3]
 409                  "VMLA.F32     q1,   q7,   d10[1]  \n\t" // column 1+= rhsPtr[4..7] * lhsPtr[4..7]
 410                  "VMLA.F32     q2,   q7,   d12[1]  \n\t" // column 2+= rhsPtr[4..7] * lhsPtr[8..11]
 411                  "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
 412                  "VMLA.F32     q0,   q7,   d9[0]   \n\t" // column 0+= rhsPtr[8..11] * lhsPtr[0..3]
 413                  "VMLA.F32     q1,   q7,   d11[0]  \n\t" // column 1+= rhsPtr[8..11] * lhsPtr[4..7]
 414                  "VMLA.F32     q2,   q7,   d13[0]  \n\t" // column 2+= rhsPtr[8..11] * lhsPtr[8..11]
 415                  "VSTM         %0,   {q0-q2}       \n\t" // store entire output matrix.
 416                  :
 417                  : "r"(temp), "r"(lhsPtr), "r" (rhsPtr)
 418                  : "%r0", "%q0", "%q1", "%q2", "%q4", "%q5", "%q6", "%q7", "memory" );
 419
 420   temp[ 12 ] = 0.0f;
 421   temp[ 13 ] = 0.0f;
 422   temp[ 14 ] = 0.0f;
 423   temp[ 15 ] = 1.0f;
 424 #endif
 425 }
 426
 427 Vector4 Matrix::operator*(const Vector4& rhs) const
 428 {
 429   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,16);
 430
 431   Vector4 temp;
 432
 433 #ifndef  __ARM_NEON__
 434
 435   temp.x = rhs.x * mMatrix[0] + rhs.y * mMatrix[4] + rhs.z * mMatrix[8]  +  rhs.w * mMatrix[12];
 436   temp.y = rhs.x * mMatrix[1] + rhs.y * mMatrix[5] + rhs.z * mMatrix[9]  +  rhs.w * mMatrix[13];
 437   temp.z = rhs.x * mMatrix[2] + rhs.y * mMatrix[6] + rhs.z * mMatrix[10] +  rhs.w * mMatrix[14];
 438   temp.w = rhs.x * mMatrix[3] + rhs.y * mMatrix[7] + rhs.z * mMatrix[11] +  rhs.w * mMatrix[15];
 439
 440 #else
 441
 442   // 64 32bit registers,
 443   // aliased to
 444   // d = 64 bit double-word d0 -d31
 445   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 446   // e.g. q0 = d0 and d1
 447   // load and stores interleaved as NEON can load and store while calculating
 448   asm volatile ( "VLD1.F32     {q0}, [%1]        \n\t"   //q0 = rhs
 449                  "VLD1.F32     {q9}, [%0]!       \n\t"
 450                  "VMUL.F32     q10,  q9,   d0[0] \n\t"
 451                  "VLD1.F32     {q9}, [%0]!       \n\t"
 452                  "VMLA.F32     q10,  q9,   d0[1] \n\t"   //q10 = mMatrix[0..3] * rhs + mMatrix[4..7] * rhs
 453                  "VLD1.F32     {q9}, [%0]!       \n\t"
 454                  "VMUL.F32     q11,  q9,   d1[0] \n\t"
 455                  "VLD1.F32     {q9}, [%0]!       \n\t"
 456                  "VMLA.F32     q11,  q9,   d1[1] \n\t"   //q11 = mMatrix[8..11] * rhs + mMatrix[12..15] * rhs
 457                  "VADD.F32     q10,  q10,  q11   \n\t"
 458                  "VST1.F32     {q10},[%2]        \n\t"   //temp = q10 + q11
 459                  :
 460                  : "r"(mMatrix), "r"(&rhs), "r"(&temp)
 461                  : "q0", "q9", "q10", "q11", "memory" );
 462 #endif
 463   return temp;
 464 }
 465
 466 bool Matrix::operator==(const Matrix& rhs) const
 467 {
 468   return (
 469   ( fabsf( mMatrix[0] - rhs.mMatrix[0] ) <= GetRangedEpsilon( mMatrix[0], rhs.mMatrix[0] ) ) &&
 470   ( fabsf( mMatrix[1] - rhs.mMatrix[1] ) <= GetRangedEpsilon( mMatrix[1], rhs.mMatrix[1] ) ) &&
 471   ( fabsf( mMatrix[2] - rhs.mMatrix[2] ) <= GetRangedEpsilon( mMatrix[2], rhs.mMatrix[2] ) ) &&
 472   ( fabsf( mMatrix[3] - rhs.mMatrix[3] ) <= GetRangedEpsilon( mMatrix[3], rhs.mMatrix[3] ) ) &&
 473   ( fabsf( mMatrix[4] - rhs.mMatrix[4] ) <= GetRangedEpsilon( mMatrix[4], rhs.mMatrix[4] ) ) &&
 474   ( fabsf( mMatrix[5] - rhs.mMatrix[5] ) <= GetRangedEpsilon( mMatrix[5], rhs.mMatrix[5] ) ) &&
 475   ( fabsf( mMatrix[6] - rhs.mMatrix[6] ) <= GetRangedEpsilon( mMatrix[6], rhs.mMatrix[6] ) ) &&
 476   ( fabsf( mMatrix[7] - rhs.mMatrix[7] ) <= GetRangedEpsilon( mMatrix[7], rhs.mMatrix[7] ) ) &&
 477   ( fabsf( mMatrix[8] - rhs.mMatrix[8] ) <= GetRangedEpsilon( mMatrix[8], rhs.mMatrix[8] ) ) &&
 478   ( fabsf( mMatrix[9] - rhs.mMatrix[9] ) <= GetRangedEpsilon( mMatrix[9], rhs.mMatrix[9] ) ) &&
 479   ( fabsf( mMatrix[10] - rhs.mMatrix[10] ) <= GetRangedEpsilon( mMatrix[10], rhs.mMatrix[10] ) ) &&
 480   ( fabsf( mMatrix[11] - rhs.mMatrix[11] ) <= GetRangedEpsilon( mMatrix[11], rhs.mMatrix[11] ) ) &&
 481   ( fabsf( mMatrix[12] - rhs.mMatrix[12] ) <= GetRangedEpsilon( mMatrix[12], rhs.mMatrix[12] ) ) &&
 482   ( fabsf( mMatrix[13] - rhs.mMatrix[13] ) <= GetRangedEpsilon( mMatrix[13], rhs.mMatrix[13] ) ) &&
 483   ( fabsf( mMatrix[14] - rhs.mMatrix[14] ) <= GetRangedEpsilon( mMatrix[14], rhs.mMatrix[14] ) ) &&
 484   ( fabsf( mMatrix[15] - rhs.mMatrix[15] ) <= GetRangedEpsilon( mMatrix[15], rhs.mMatrix[15] ) ) );
 485 }
 486
 487 bool Matrix::operator!=(const Matrix& rhs) const
 488 {
 489   if (*this == rhs)
 490   {
 491     return false;
 492   }
 493
 494   return true;
 495 }
 496
 497 void Matrix::OrthoNormalize()
 498 {
 499   Vector4 vector0(GetXAxis());
 500   Vector4 vector1(GetYAxis());
 501   Vector4 vector2(GetZAxis());
 502
 503   vector0.Normalize();
 504   vector1.Normalize();
 505   vector2 = vector0.Cross( vector1 );
 506   vector1 = vector2.Cross( vector0 );
 507
 508   memcpy( mMatrix, &vector0, NUM_BYTES_IN_ROW );
 509   memcpy( mMatrix + ROW1_OFFSET, &vector1, NUM_BYTES_IN_ROW );
 510   memcpy( mMatrix + ROW2_OFFSET, &vector2, NUM_BYTES_IN_ROW );
 511 }
 512
 513 Vector3 Matrix::GetXAxis() const
 514 {
 515   return Vector3(mMatrix[0], mMatrix[1], mMatrix[2]);
 516 }
 517
 518 Vector3 Matrix::GetYAxis() const
 519 {
 520   return Vector3(mMatrix[4], mMatrix[5], mMatrix[6]);
 521 }
 522
 523 Vector3 Matrix::GetZAxis() const
 524 {
 525   return Vector3(mMatrix[8], mMatrix[9], mMatrix[10]);
 526 }
 527
 528 void Matrix::SetXAxis(const Vector3& axis)
 529 {
 530   mMatrix[0] = axis.x;
 531   mMatrix[1] = axis.y;
 532   mMatrix[2] = axis.z;
 533 }
 534
 535 void Matrix::SetYAxis(const Vector3& axis)
 536 {
 537   mMatrix[4] = axis.x;
 538   mMatrix[5] = axis.y;
 539   mMatrix[6] = axis.z;
 540 }
 541
 542 void Matrix::SetZAxis(const Vector3& axis)
 543 {
 544   mMatrix[8] = axis.x;
 545   mMatrix[9] = axis.y;
 546   mMatrix[10] = axis.z;
 547 }
 548
 549 void Matrix::SetTransformComponents(const Vector3&    scale,
 550                                     const Quaternion& rotation,
 551                                     const Vector3&    translation )
 552 {
 553   if( rotation.IsIdentity() )
 554   {
 555     mMatrix[0] = scale.x;
 556     mMatrix[1] = 0.0f;
 557     mMatrix[2] = 0.0f;
 558     mMatrix[3] = 0.0f;
 559
 560     mMatrix[4] = 0.0f;
 561     mMatrix[5] = scale.y;
 562     mMatrix[6] = 0.0f;
 563     mMatrix[7] = 0.0f;
 564
 565     mMatrix[8] = 0.0f;
 566     mMatrix[9] = 0.0f;
 567     mMatrix[10]= scale.z;
 568     mMatrix[11]= 0.0f;
 569   }
 570   else
 571   {
 572     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 573     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,27); // 27 = 9+18
 574
 575     const float xx = rotation.mVector.x * rotation.mVector.x;
 576     const float yy = rotation.mVector.y * rotation.mVector.y;
 577     const float zz = rotation.mVector.z * rotation.mVector.z;
 578     const float xy = rotation.mVector.x * rotation.mVector.y;
 579     const float xz = rotation.mVector.x * rotation.mVector.z;
 580     const float wx = rotation.mVector.w * rotation.mVector.x;
 581     const float wy = rotation.mVector.w * rotation.mVector.y;
 582     const float wz = rotation.mVector.w * rotation.mVector.z;
 583     const float yz = rotation.mVector.y * rotation.mVector.z;
 584
 585     mMatrix[0] = (scale.x * (1.0f - 2.0f * (yy + zz)));
 586     mMatrix[1] = (scale.x * (       2.0f * (xy + wz)));
 587     mMatrix[2] = (scale.x * (       2.0f * (xz - wy)));
 588     mMatrix[3] = 0.0f;
 589
 590     mMatrix[4] = (scale.y * (       2.0f * (xy - wz)));
 591     mMatrix[5] = (scale.y * (1.0f - 2.0f * (xx + zz)));
 592     mMatrix[6] = (scale.y * (       2.0f * (yz + wx)));
 593     mMatrix[7] = 0.0f;
 594
 595     mMatrix[8] = (scale.z * (       2.0f * (xz + wy)));
 596     mMatrix[9] = (scale.z * (       2.0f * (yz - wx)));
 597     mMatrix[10]= (scale.z * (1.0f - 2.0f * (xx + yy)));
 598     mMatrix[11]= 0.0f;
 599   }
 600   // apply translation
 601   mMatrix[12] = translation.x;
 602   mMatrix[13] = translation.y;
 603   mMatrix[14] = translation.z;
 604   mMatrix[15] = 1.0f;
 605 }
 606
 607 void Matrix::SetInverseTransformComponents(const Vector3&    scale,
 608                                            const Quaternion& rotation,
 609                                            const Vector3&    translation )
 610 {
 611   Vector3 inverseTranslation = -translation;
 612   Vector3 inverseScale( 1.0f/scale.x, 1.0f/scale.y, 1.0f/scale.z);
 613   Quaternion inverseRotation(rotation);
 614   bool isRotated = ! inverseRotation.IsIdentity();
 615
 616   // Order of application is translation, rotation, scale.
 617   // Ensure translation is relative to scale & rotation:
 618
 619   if( isRotated )
 620   {
 621     inverseRotation.Invert();
 622     inverseTranslation = inverseRotation.Rotate(inverseTranslation);
 623   }
 624
 625   inverseTranslation *= inverseScale;
 626
 627   if( isRotated )
 628   {
 629     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 630     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,27); // 27 = 9+18
 631
 632     const float xx = inverseRotation.mVector.x * inverseRotation.mVector.x;
 633     const float yy = inverseRotation.mVector.y * inverseRotation.mVector.y;
 634     const float zz = inverseRotation.mVector.z * inverseRotation.mVector.z;
 635     const float xy = inverseRotation.mVector.x * inverseRotation.mVector.y;
 636     const float xz = inverseRotation.mVector.x * inverseRotation.mVector.z;
 637     const float wx = inverseRotation.mVector.w * inverseRotation.mVector.x;
 638     const float wy = inverseRotation.mVector.w * inverseRotation.mVector.y;
 639     const float wz = inverseRotation.mVector.w * inverseRotation.mVector.z;
 640     const float yz = inverseRotation.mVector.y * inverseRotation.mVector.z;
 641
 642     mMatrix[0] = (inverseScale.x * (1.0f - 2.0f * (yy + zz)));
 643     mMatrix[1] = (inverseScale.y * (2.0f * (xy + wz)));
 644     mMatrix[2] = (inverseScale.z * (2.0f * (xz - wy)));
 645     mMatrix[3] = 0.0f;
 646
 647     mMatrix[4] = (inverseScale.x * (2.0f * (xy - wz)));
 648     mMatrix[5] = (inverseScale.y * (1.0f - 2.0f * (xx + zz)));
 649     mMatrix[6] = (inverseScale.z * (2.0f * (yz + wx)));
 650     mMatrix[7] = 0.0f;
 651
 652     mMatrix[8] = (inverseScale.x * (2.0f * (xz + wy)));
 653     mMatrix[9] = (inverseScale.y * (2.0f * (yz - wx)));
 654     mMatrix[10]= (inverseScale.z * (1.0f - 2.0f * (xx + yy)));
 655     mMatrix[11]= 0.0f;
 656   }
 657   else
 658   {
 659     mMatrix[0] = inverseScale.x;
 660     mMatrix[1] = 0.0f;
 661     mMatrix[2] = 0.0f;
 662     mMatrix[3] = 0.0f;
 663
 664     mMatrix[4] = 0.0f;
 665     mMatrix[5] = inverseScale.y;
 666     mMatrix[6] = 0.0f;
 667     mMatrix[7] = 0.0f;
 668
 669     mMatrix[8] = 0.0f;
 670     mMatrix[9] = 0.0f;
 671     mMatrix[10]= inverseScale.z;
 672     mMatrix[11]= 0.0f;
 673   }
 674
 675   // apply translation
 676   mMatrix[12] = inverseTranslation.x;
 677   mMatrix[13] = inverseTranslation.y;
 678   mMatrix[14] = inverseTranslation.z;
 679   mMatrix[15] = 1.0f;
 680 }
 681
 682 void Matrix::SetInverseTransformComponents(const Vector3&    xAxis,
 683                                            const Vector3&    yAxis,
 684                                            const Vector3&    zAxis,
 685                                            const Vector3&    translation )
 686 {
 687   // x, y, z axis parameters represent a orthonormal basis with no scaling, i.e. a rotation matrix.
 688   // Invert rotation by transposing in place
 689
 690   // Order of application is translation, rotation
 691
 692   mMatrix[0]  = xAxis.x;
 693   mMatrix[1]  = yAxis.x;
 694   mMatrix[2]  = zAxis.x;
 695   mMatrix[3]  = 0.0f;
 696
 697   mMatrix[4]  = xAxis.y;
 698   mMatrix[5]  = yAxis.y;
 699   mMatrix[6]  = zAxis.y;
 700   mMatrix[7]  = 0.0f;
 701
 702   mMatrix[8]  = xAxis.z;
 703   mMatrix[9]  = yAxis.z;
 704   mMatrix[10] = zAxis.z;
 705   mMatrix[11] = 0.0f;
 706   mMatrix[12] = 0.0f;
 707   mMatrix[13] = 0.0f;
 708   mMatrix[14] = 0.0f;
 709   mMatrix[15] = 1.0f;
 710
 711   // Ensure translation is relative to scale & rotation:
 712
 713   Vector4 inverseTranslation( -translation.x, -translation.y, -translation.z, 1.0f);
 714   inverseTranslation = *this * inverseTranslation; // Rotate inverse translation
 715   inverseTranslation.w = 1.0f;
 716   SetTranslation(inverseTranslation);
 717 }
 718
 719
 720 void Matrix::GetTransformComponents(Vector3&     position,
 721                                     Quaternion&  rotation,
 722                                     Vector3&     scale) const
 723 {
 724   position = GetTranslation3();
 725
 726   // Derive scale from axis lengths.
 727   Vector3 theScale(GetXAxis().Length(), GetYAxis().Length(), GetZAxis().Length());
 728   scale = theScale;
 729
 730   if( ! ( fabs(theScale.x - Vector3::ONE.x) < ROTATION_EPSILON &&
 731           fabs(theScale.y - Vector3::ONE.y) < ROTATION_EPSILON &&
 732           fabs(theScale.z - Vector3::ONE.z) < ROTATION_EPSILON ) )
 733   {
 734     MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 735     MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY,9);
 736
 737     // Non-identity scale is embedded into rotation matrix. Remove it first:
 738     Matrix m(*this);
 739     Vector3 inverseScale(1.0f/theScale.x, 1.0f/theScale.y, 1.0f/theScale.z);
 740     m.mMatrix[0] *= inverseScale.x;
 741     m.mMatrix[1] *= inverseScale.x;
 742     m.mMatrix[2] *= inverseScale.x;
 743     m.mMatrix[4] *= inverseScale.y;
 744     m.mMatrix[5] *= inverseScale.y;
 745     m.mMatrix[6] *= inverseScale.y;
 746     m.mMatrix[8] *= inverseScale.z;
 747     m.mMatrix[9] *= inverseScale.z;
 748     m.mMatrix[10] *= inverseScale.z;
 749
 750     Quaternion theRotation(m);
 751
 752     // If the imaginary components are close to zero, then use null quaternion instead.
 753     if( fabs(theRotation.mVector.x) < ROTATION_EPSILON &&
 754         fabs(theRotation.mVector.y) < ROTATION_EPSILON &&
 755         fabs(theRotation.mVector.z) < ROTATION_EPSILON )
 756     {
 757       theRotation = Quaternion();
 758     }
 759     rotation = theRotation;
 760   }
 761   else
 762   {
 763     Quaternion theRotation(*this);
 764
 765     // If the imaginary components are close to zero, then use null quaternion instead.
 766     if( fabs(theRotation.mVector.x) < ROTATION_EPSILON &&
 767         fabs(theRotation.mVector.y) < ROTATION_EPSILON &&
 768         fabs(theRotation.mVector.z) < ROTATION_EPSILON )
 769     {
 770       theRotation = Quaternion();
 771     }
 772     rotation = theRotation;
 773   }
 774 }
 775
 776
 777
 778 std::ostream& operator<< (std::ostream& o, const Matrix& matrix)
 779 {
 780   return o << "[ [" << matrix.mMatrix[0] << ", " << matrix.mMatrix[1] << ", " << matrix.mMatrix[2]  << ", " << matrix.mMatrix[3] << "], "
 781              << "[" << matrix.mMatrix[4] << ", " << matrix.mMatrix[5] << ", " << matrix.mMatrix[6]  << ", " << matrix.mMatrix[7] << "], "
 782              << "[" << matrix.mMatrix[8] << ", " << matrix.mMatrix[9] << ", " << matrix.mMatrix[10] << ", " << matrix.mMatrix[11] << "], "
 783              << "[" << matrix.mMatrix[12] << ", " << matrix.mMatrix[13] << ", " << matrix.mMatrix[14] << ", " << matrix.mMatrix[15] << "] ]";
 784 }
 785
 786 } // namespace Dali