dali/internal/common/matrix-utils.cpp

   1 /*
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // CLASS HEADERS
  19 #include <dali/internal/common/matrix-utils.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <cstdint> // uint32_t
  23 #include <cstring> // memcpy
  24
  25 // INTERNAL INCLUDE
  26 #include <dali/internal/render/common/performance-monitor.h>
  27 #include <dali/public-api/math/matrix.h>
  28 #include <dali/public-api/math/matrix3.h>
  29 #include <dali/public-api/math/quaternion.h>
  30
  31 namespace
  32 {
  33 const uint32_t NUM_BYTES_IN_MATRIX(16 * sizeof(float));
  34 const uint32_t NUM_BYTES_IN_MATRIX3(9 * sizeof(float));
  35
  36 } // namespace
  37
  38 namespace Dali::Internal
  39 {
  40 using Internal::PerformanceMonitor;
  41
  42 namespace MatrixUtils
  43 {
  44 // Dali::Quaternion
  45
  46 void ConvertQuaternion(float*& result, const Dali::Quaternion& rotation)
  47 {
  48   MATH_INCREASE_COUNTER(PerformanceMonitor::QUATERNION_TO_MATRIX);
  49
  50   const float xx = rotation.mVector.x * rotation.mVector.x;
  51   const float yy = rotation.mVector.y * rotation.mVector.y;
  52   const float zz = rotation.mVector.z * rotation.mVector.z;
  53   const float xy = rotation.mVector.x * rotation.mVector.y;
  54   const float xz = rotation.mVector.x * rotation.mVector.z;
  55   const float wx = rotation.mVector.w * rotation.mVector.x;
  56   const float wy = rotation.mVector.w * rotation.mVector.y;
  57   const float wz = rotation.mVector.w * rotation.mVector.z;
  58   const float yz = rotation.mVector.y * rotation.mVector.z;
  59
  60   // clang-format off
  61   result[0] = 1.0f - 2.0f * (yy + zz);
  62   result[1] =        2.0f * (xy + wz);
  63   result[2] =        2.0f * (xz - wy);
  64   result[3] = 0.0f;
  65
  66   result[4] =        2.0f * (xy - wz);
  67   result[5] = 1.0f - 2.0f * (xx + zz);
  68   result[6] =        2.0f * (yz + wx);
  69   result[7] = 0.0f;
  70
  71   result[8] =        2.0f * (xz + wy);
  72   result[9] =        2.0f * (yz - wx);
  73   result[10]= 1.0f - 2.0f * (xx + yy);
  74   result[11]= 0.0f;
  75
  76   result[12]= 0.0f;
  77   result[13]= 0.0f;
  78   result[14]= 0.0f;
  79   result[15]= 1.0f;
  80   // clang-format on
  81 }
  82
  83 // Dali::Matrix
  84
  85 void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& rhs)
  86 {
  87   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
  88   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 64); // 64 = 16*4
  89
  90   float*       temp   = result.AsFloat();
  91   const float* rhsPtr = rhs.AsFloat();
  92   const float* lhsPtr = lhs.AsFloat();
  93
  94 #ifndef __ARM_NEON__
  95
  96   for(int32_t i = 0; i < 4; i++)
  97   {
  98     // i<<2 gives the first vector / column
  99     const int32_t loc0 = i << 2;
 100     const int32_t loc1 = loc0 + 1;
 101     const int32_t loc2 = loc0 + 2;
 102     const int32_t loc3 = loc0 + 3;
 103
 104     const float value0 = lhsPtr[loc0];
 105     const float value1 = lhsPtr[loc1];
 106     const float value2 = lhsPtr[loc2];
 107     const float value3 = lhsPtr[loc3];
 108
 109     temp[loc0] = (value0 * rhsPtr[0]) +
 110                  (value1 * rhsPtr[4]) +
 111                  (value2 * rhsPtr[8]) +
 112                  (value3 * rhsPtr[12]);
 113
 114     temp[loc1] = (value0 * rhsPtr[1]) +
 115                  (value1 * rhsPtr[5]) +
 116                  (value2 * rhsPtr[9]) +
 117                  (value3 * rhsPtr[13]);
 118
 119     temp[loc2] = (value0 * rhsPtr[2]) +
 120                  (value1 * rhsPtr[6]) +
 121                  (value2 * rhsPtr[10]) +
 122                  (value3 * rhsPtr[14]);
 123
 124     temp[loc3] = (value0 * rhsPtr[3]) +
 125                  (value1 * rhsPtr[7]) +
 126                  (value2 * rhsPtr[11]) +
 127                  (value3 * rhsPtr[15]);
 128   }
 129
 130 #else
 131
 132   // 64 32bit registers,
 133   // aliased to
 134   // d = 64 bit double-word d0 -d31
 135   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 136   // e.g. q0 = d0 and d1
 137
 138   // load and stores interleaved as NEON can load and store while calculating
 139   asm volatile(
 140     "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
 141     "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
 142     "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 143     "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 144     "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 145     "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 146
 147     "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 148     "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 149     "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 150     "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 151
 152     "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 153     "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 154     "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 155     "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 156
 157     "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 158     "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 159     "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 160     "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 161     "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
 162     : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 163     :
 164     : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
 165
 166 #endif
 167 }
 168
 169 void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Quaternion& rhs)
 170 {
 171   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 172   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 54); // 54 = 36+18
 173
 174   float  matrix[16];
 175   float* rhsPtr = &matrix[0];
 176   ConvertQuaternion(rhsPtr, rhs);
 177
 178   // quaternion contains just rotation so it really only needs 3x3 matrix
 179
 180   float*       temp   = result.AsFloat();
 181   const float* lhsPtr = lhs.AsFloat();
 182
 183 #ifndef __ARM_NEON__
 184
 185   for(int32_t i = 0; i < 4; i++)
 186   {
 187     // i<<2 gives the first vector / column
 188     const int32_t loc0 = i << 2;
 189     const int32_t loc1 = loc0 + 1;
 190     const int32_t loc2 = loc0 + 2;
 191     const int32_t loc3 = loc0 + 3;
 192
 193     const float value0 = lhsPtr[loc0];
 194     const float value1 = lhsPtr[loc1];
 195     const float value2 = lhsPtr[loc2];
 196     const float value3 = lhsPtr[loc3];
 197
 198     temp[loc0] = (value0 * rhsPtr[0]) +
 199                  (value1 * rhsPtr[4]) +
 200                  (value2 * rhsPtr[8]) +
 201                  (0.0f); //value3 * rhsPtr[12] is 0.0f
 202
 203     temp[loc1] = (value0 * rhsPtr[1]) +
 204                  (value1 * rhsPtr[5]) +
 205                  (value2 * rhsPtr[9]) +
 206                  (0.0f); //value3 * rhsPtr[13] is 0.0f
 207
 208     temp[loc2] = (value0 * rhsPtr[2]) +
 209                  (value1 * rhsPtr[6]) +
 210                  (value2 * rhsPtr[10]) +
 211                  (0.0f); //value3 * rhsPtr[14] is 0.0f
 212
 213     temp[loc3] = (0.0f) +  //value0 * rhsPtr[3] is 0.0f
 214                  (0.0f) +  //value1 * rhsPtr[7] is 0.0f
 215                  (0.0f) +  //value2 * rhsPtr[11] is 0.0f
 216                  (value3); // rhsPtr[15] is 1.0f
 217   }
 218
 219 #else
 220
 221   // 64 32bit registers,
 222   // aliased to
 223   // d = 64 bit double-word d0 -d31
 224   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 225   // e.g. q0 = d0 and d1
 226   // load and stores interleaved as NEON can load and store while calculating
 227   asm volatile(
 228     "VLDM         %1,   {q4-q6}       \n\t" // load matrix 1 (lhsPtr)
 229     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
 230     "VMUL.F32     q0,   q7,   d8[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 231     "VMUL.F32     q1,   q7,   d10[0]  \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 232     "VMUL.F32     q2,   q7,   d12[0]  \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 233     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
 234     "VMLA.F32     q0,   q7,   d8[1]   \n\t" // column 0+= rhsPtr[4..7] * lhsPtr[0..3]
 235     "VMLA.F32     q1,   q7,   d10[1]  \n\t" // column 1+= rhsPtr[4..7] * lhsPtr[4..7]
 236     "VMLA.F32     q2,   q7,   d12[1]  \n\t" // column 2+= rhsPtr[4..7] * lhsPtr[8..11]
 237     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
 238     "VMLA.F32     q0,   q7,   d9[0]   \n\t" // column 0+= rhsPtr[8..11] * lhsPtr[0..3]
 239     "VMLA.F32     q1,   q7,   d11[0]  \n\t" // column 1+= rhsPtr[8..11] * lhsPtr[4..7]
 240     "VMLA.F32     q2,   q7,   d13[0]  \n\t" // column 2+= rhsPtr[8..11] * lhsPtr[8..11]
 241     "VSTM         %0,   {q0-q2}       \n\t" // store entire output matrix.
 242     :
 243     : "r"(temp), "r"(lhsPtr), "r"(rhsPtr)
 244     : "%r0", "%q0", "%q1", "%q2", "%q4", "%q5", "%q6", "%q7", "memory");
 245
 246   temp[12] = 0.0f;
 247   temp[13] = 0.0f;
 248   temp[14] = 0.0f;
 249   temp[15] = 1.0f;
 250 #endif
 251 }
 252
 253 void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& projection)
 254 {
 255   // TODO : Implement with NEON.
 256   // Current NEON code is copy of Multiply.
 257
 258   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 259   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 32); // 32 = 8*4
 260
 261   float*       temp   = result.AsFloat();
 262   const float* rhsPtr = projection.AsFloat();
 263   const float* lhsPtr = lhs.AsFloat();
 264
 265 #ifndef __ARM_NEON__
 266
 267   // We only use rhsPtr's 0, 1, 4, 5, 10, 11, 14, 15 index.
 268   const float rhs0  = rhsPtr[0];
 269   const float rhs1  = rhsPtr[1];
 270   const float rhs4  = rhsPtr[4];
 271   const float rhs5  = rhsPtr[5];
 272   const float rhs10 = rhsPtr[10];
 273   const float rhs11 = rhsPtr[11];
 274   const float rhs14 = rhsPtr[14];
 275   const float rhs15 = rhsPtr[15];
 276
 277   for(int32_t i = 0; i < 4; i++)
 278   {
 279     // i<<2 gives the first vector / column
 280     const int32_t loc0 = i << 2;
 281     const int32_t loc1 = loc0 + 1;
 282     const int32_t loc2 = loc0 + 2;
 283     const int32_t loc3 = loc0 + 3;
 284
 285     const float value0 = lhsPtr[loc0];
 286     const float value1 = lhsPtr[loc1];
 287     const float value2 = lhsPtr[loc2];
 288     const float value3 = lhsPtr[loc3];
 289
 290     temp[loc0] = (value0 * rhs0) + (value1 * rhs4);
 291     temp[loc1] = (value0 * rhs1) + (value1 * rhs5);
 292     temp[loc2] = (value2 * rhs10) + (value3 * rhs14);
 293     temp[loc3] = (value2 * rhs11) + (value3 * rhs15);
 294   }
 295
 296 #else
 297
 298   // 64 32bit registers,
 299   // aliased to
 300   // d = 64 bit double-word d0 -d31
 301   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 302   // e.g. q0 = d0 and d1
 303
 304   // load and stores interleaved as NEON can load and store while calculating
 305   asm volatile(
 306     "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
 307     "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
 308     "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 309     "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 310     "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 311     "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 312
 313     "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 314     "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 315     "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 316     "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 317
 318     "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 319     "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 320     "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 321     "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 322
 323     "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 324     "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 325     "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 326     "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 327     "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
 328     : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 329     :
 330     : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
 331
 332 #endif
 333 }
 334
 335 void MultiplyAssign(Dali::Matrix& result, const Dali::Matrix& rhs)
 336 {
 337   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 338   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 64); // 64 = 16*4
 339
 340   // TODO : Implement with NEON.
 341
 342   float*       lhsPtr = result.AsFloat();
 343   const float* rhsPtr = rhs.AsFloat();
 344   float*       temp   = nullptr;
 345
 346   if(lhsPtr == rhsPtr)
 347   {
 348     // If rhs is same matrix with result, we need to copy temperal vaules.
 349     temp = static_cast<float*>(malloc(NUM_BYTES_IN_MATRIX));
 350     memcpy(temp, rhsPtr, NUM_BYTES_IN_MATRIX);
 351     rhsPtr = temp;
 352   }
 353
 354   // Calculate and store as row major.
 355   for(int32_t i = 0; i < 4; i++)
 356   {
 357     const int32_t loc0 = i;
 358     const int32_t loc1 = loc0 | 4;
 359     const int32_t loc2 = loc0 | 8;
 360     const int32_t loc3 = loc0 | 12;
 361
 362     const float value0 = lhsPtr[loc0];
 363     const float value1 = lhsPtr[loc1];
 364     const float value2 = lhsPtr[loc2];
 365     const float value3 = lhsPtr[loc3];
 366
 367     lhsPtr[loc0] = (value0 * rhsPtr[0]) +
 368                    (value1 * rhsPtr[1]) +
 369                    (value2 * rhsPtr[2]) +
 370                    (value3 * rhsPtr[3]);
 371
 372     lhsPtr[loc1] = (value0 * rhsPtr[4]) +
 373                    (value1 * rhsPtr[5]) +
 374                    (value2 * rhsPtr[6]) +
 375                    (value3 * rhsPtr[7]);
 376
 377     lhsPtr[loc2] = (value0 * rhsPtr[8]) +
 378                    (value1 * rhsPtr[9]) +
 379                    (value2 * rhsPtr[10]) +
 380                    (value3 * rhsPtr[11]);
 381
 382     lhsPtr[loc3] = (value0 * rhsPtr[12]) +
 383                    (value1 * rhsPtr[13]) +
 384                    (value2 * rhsPtr[14]) +
 385                    (value3 * rhsPtr[15]);
 386   }
 387
 388   if(temp)
 389   {
 390     // If we allocate temperal memory, we should free it.
 391     free(temp);
 392   }
 393 }
 394
 395 // Dali::Matrix3
 396
 397 void Multiply(Dali::Matrix3& result, const Dali::Matrix3& lhs, const Dali::Matrix3& rhs)
 398 {
 399   float*       temp   = result.AsFloat();
 400   const float* rhsPtr = rhs.AsFloat();
 401   const float* lhsPtr = lhs.AsFloat();
 402
 403   for(int32_t i = 0; i < 3; i++)
 404   {
 405     const int32_t loc0 = i * 3;
 406     const int32_t loc1 = loc0 + 1;
 407     const int32_t loc2 = loc0 + 2;
 408
 409     const float value0 = lhsPtr[loc0];
 410     const float value1 = lhsPtr[loc1];
 411     const float value2 = lhsPtr[loc2];
 412
 413     temp[loc0] = (value0 * rhsPtr[0]) +
 414                  (value1 * rhsPtr[3]) +
 415                  (value2 * rhsPtr[6]);
 416
 417     temp[loc1] = (value0 * rhsPtr[1]) +
 418                  (value1 * rhsPtr[4]) +
 419                  (value2 * rhsPtr[7]);
 420
 421     temp[loc2] = (value0 * rhsPtr[2]) +
 422                  (value1 * rhsPtr[5]) +
 423                  (value2 * rhsPtr[8]);
 424   }
 425 }
 426
 427 void MultiplyAssign(Dali::Matrix3& result, const Dali::Matrix3& rhs)
 428 {
 429   float*       lhsPtr = result.AsFloat();
 430   const float* rhsPtr = rhs.AsFloat();
 431   float*       temp   = nullptr;
 432
 433   if(lhsPtr == rhsPtr)
 434   {
 435     // If rhs is same matrix with result, we need to copy temperal vaules.
 436     temp = static_cast<float*>(malloc(NUM_BYTES_IN_MATRIX3));
 437     memcpy(temp, rhsPtr, NUM_BYTES_IN_MATRIX3);
 438     rhsPtr = temp;
 439   }
 440
 441   // Calculate and store as row major.
 442   for(int32_t i = 0; i < 3; i++)
 443   {
 444     const int32_t loc0 = i;
 445     const int32_t loc1 = loc0 + 3;
 446     const int32_t loc2 = loc0 + 6;
 447
 448     const float value0 = lhsPtr[loc0];
 449     const float value1 = lhsPtr[loc1];
 450     const float value2 = lhsPtr[loc2];
 451
 452     lhsPtr[loc0] = (value0 * rhsPtr[0]) +
 453                    (value1 * rhsPtr[1]) +
 454                    (value2 * rhsPtr[2]);
 455
 456     lhsPtr[loc1] = (value0 * rhsPtr[3]) +
 457                    (value1 * rhsPtr[4]) +
 458                    (value2 * rhsPtr[5]);
 459
 460     lhsPtr[loc2] = (value0 * rhsPtr[6]) +
 461                    (value1 * rhsPtr[7]) +
 462                    (value2 * rhsPtr[8]);
 463   }
 464
 465   if(temp)
 466   {
 467     // If we allocate temperal memory, we should free it.
 468     free(temp);
 469   }
 470 }
 471
 472 } // namespace MatrixUtils
 473 } // namespace Dali::Internal