dali/internal/common/matrix-utils.cpp

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 // CLASS HEADERS
  19 #include <dali/internal/common/matrix-utils.h>
  20
  21 // EXTERNAL INCLUDES
  22 #include <cstdint> // uint32_t
  23 #include <cstring> // memcpy
  24
  25 // INTERNAL INCLUDE
  26 #include <dali/internal/render/common/performance-monitor.h>
  27 #include <dali/public-api/math/matrix.h>
  28 #include <dali/public-api/math/matrix3.h>
  29 #include <dali/public-api/math/quaternion.h>
  30
  31 namespace
  32 {
  33 const uint32_t NUM_BYTES_IN_MATRIX(16 * sizeof(float));
  34 const uint32_t NUM_BYTES_IN_MATRIX3(9 * sizeof(float));
  35
  36 } // namespace
  37
  38 namespace Dali::Internal
  39 {
  40 using Internal::PerformanceMonitor;
  41
  42 namespace MatrixUtils
  43 {
  44 // Dali::Quaternion
  45
  46 void ConvertQuaternion(float*& result, const Dali::Quaternion& rotation)
  47 {
  48   MATH_INCREASE_COUNTER(PerformanceMonitor::QUATERNION_TO_MATRIX);
  49
  50   const float xx = rotation.mVector.x * rotation.mVector.x;
  51   const float yy = rotation.mVector.y * rotation.mVector.y;
  52   const float zz = rotation.mVector.z * rotation.mVector.z;
  53   const float xy = rotation.mVector.x * rotation.mVector.y;
  54   const float xz = rotation.mVector.x * rotation.mVector.z;
  55   const float wx = rotation.mVector.w * rotation.mVector.x;
  56   const float wy = rotation.mVector.w * rotation.mVector.y;
  57   const float wz = rotation.mVector.w * rotation.mVector.z;
  58   const float yz = rotation.mVector.y * rotation.mVector.z;
  59
  60   // clang-format off
  61   result[0] = 1.0f - 2.0f * (yy + zz);
  62   result[1] =        2.0f * (xy + wz);
  63   result[2] =        2.0f * (xz - wy);
  64   result[3] = 0.0f;
  65
  66   result[4] =        2.0f * (xy - wz);
  67   result[5] = 1.0f - 2.0f * (xx + zz);
  68   result[6] =        2.0f * (yz + wx);
  69   result[7] = 0.0f;
  70
  71   result[8] =        2.0f * (xz + wy);
  72   result[9] =        2.0f * (yz - wx);
  73   result[10]= 1.0f - 2.0f * (xx + yy);
  74   result[11]= 0.0f;
  75
  76   result[12]= 0.0f;
  77   result[13]= 0.0f;
  78   result[14]= 0.0f;
  79   result[15]= 1.0f;
  80   // clang-format on
  81 }
  82
  83 // Dali::Matrix
  84
  85 void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& rhs)
  86 {
  87   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
  88   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 64); // 64 = 16*4
  89
  90   float*       temp   = result.AsFloat();
  91   const float* rhsPtr = rhs.AsFloat();
  92   const float* lhsPtr = lhs.AsFloat();
  93
  94 #ifndef __ARM_NEON__
  95
  96   for(int32_t i = 0; i < 4; i++)
  97   {
  98     // i<<2 gives the first vector / column
  99     const int32_t loc0 = i << 2;
 100     const int32_t loc1 = loc0 + 1;
 101     const int32_t loc2 = loc0 + 2;
 102     const int32_t loc3 = loc0 + 3;
 103
 104     const float value0 = lhsPtr[loc0];
 105     const float value1 = lhsPtr[loc1];
 106     const float value2 = lhsPtr[loc2];
 107     const float value3 = lhsPtr[loc3];
 108
 109     temp[loc0] = (value0 * rhsPtr[0]) +
 110                  (value1 * rhsPtr[4]) +
 111                  (value2 * rhsPtr[8]) +
 112                  (value3 * rhsPtr[12]);
 113
 114     temp[loc1] = (value0 * rhsPtr[1]) +
 115                  (value1 * rhsPtr[5]) +
 116                  (value2 * rhsPtr[9]) +
 117                  (value3 * rhsPtr[13]);
 118
 119     temp[loc2] = (value0 * rhsPtr[2]) +
 120                  (value1 * rhsPtr[6]) +
 121                  (value2 * rhsPtr[10]) +
 122                  (value3 * rhsPtr[14]);
 123
 124     temp[loc3] = (value0 * rhsPtr[3]) +
 125                  (value1 * rhsPtr[7]) +
 126                  (value2 * rhsPtr[11]) +
 127                  (value3 * rhsPtr[15]);
 128   }
 129
 130 #else
 131
 132   // 64 32bit registers,
 133   // aliased to
 134   // d = 64 bit double-word d0 -d31
 135   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 136   // e.g. q0 = d0 and d1
 137
 138   // load and stores interleaved as NEON can load and store while calculating
 139   asm volatile(
 140     "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
 141     "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
 142     "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 143     "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 144     "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 145     "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 146
 147     "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 148     "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 149     "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 150     "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 151
 152     "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 153     "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 154     "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 155     "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 156
 157     "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 158     "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 159     "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 160     "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 161     "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
 162     : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 163     :
 164     : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
 165
 166 #endif
 167 }
 168
 169 void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Quaternion& rhs)
 170 {
 171   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 172   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 54); // 54 = 36+18
 173
 174   float  matrix[16];
 175   float* rhsPtr = &matrix[0];
 176   ConvertQuaternion(rhsPtr, rhs);
 177
 178   // quaternion contains just rotation so it really only needs 3x3 matrix
 179
 180   float*       temp   = result.AsFloat();
 181   const float* lhsPtr = lhs.AsFloat();
 182
 183 #ifndef __ARM_NEON__
 184
 185   for(int32_t i = 0; i < 4; i++)
 186   {
 187     // i<<2 gives the first vector / column
 188     const int32_t loc0 = i << 2;
 189     const int32_t loc1 = loc0 + 1;
 190     const int32_t loc2 = loc0 + 2;
 191     const int32_t loc3 = loc0 + 3;
 192
 193     const float value0 = lhsPtr[loc0];
 194     const float value1 = lhsPtr[loc1];
 195     const float value2 = lhsPtr[loc2];
 196     const float value3 = lhsPtr[loc3];
 197
 198     temp[loc0] = (value0 * rhsPtr[0]) +
 199                  (value1 * rhsPtr[4]) +
 200                  (value2 * rhsPtr[8]) +
 201                  (0.0f); //value3 * rhsPtr[12] is 0.0f
 202
 203     temp[loc1] = (value0 * rhsPtr[1]) +
 204                  (value1 * rhsPtr[5]) +
 205                  (value2 * rhsPtr[9]) +
 206                  (0.0f); //value3 * rhsPtr[13] is 0.0f
 207
 208     temp[loc2] = (value0 * rhsPtr[2]) +
 209                  (value1 * rhsPtr[6]) +
 210                  (value2 * rhsPtr[10]) +
 211                  (0.0f); //value3 * rhsPtr[14] is 0.0f
 212
 213     temp[loc3] = (0.0f) +  //value0 * rhsPtr[3] is 0.0f
 214                  (0.0f) +  //value1 * rhsPtr[7] is 0.0f
 215                  (0.0f) +  //value2 * rhsPtr[11] is 0.0f
 216                  (value3); // rhsPtr[15] is 1.0f
 217   }
 218
 219 #else
 220
 221   // 64 32bit registers,
 222   // aliased to
 223   // d = 64 bit double-word d0 -d31
 224   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 225   // e.g. q0 = d0 and d1
 226   // load and stores interleaved as NEON can load and store while calculating
 227   asm volatile(
 228     "VLDM         %1,   {q4-q6}       \n\t" // load matrix 1 (lhsPtr)
 229     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
 230     "VMUL.F32     q0,   q7,   d8[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 231     "VMUL.F32     q1,   q7,   d10[0]  \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 232     "VMUL.F32     q2,   q7,   d12[0]  \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 233     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
 234     "VMLA.F32     q0,   q7,   d8[1]   \n\t" // column 0+= rhsPtr[4..7] * lhsPtr[0..3]
 235     "VMLA.F32     q1,   q7,   d10[1]  \n\t" // column 1+= rhsPtr[4..7] * lhsPtr[4..7]
 236     "VMLA.F32     q2,   q7,   d12[1]  \n\t" // column 2+= rhsPtr[4..7] * lhsPtr[8..11]
 237     "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
 238     "VMLA.F32     q0,   q7,   d9[0]   \n\t" // column 0+= rhsPtr[8..11] * lhsPtr[0..3]
 239     "VMLA.F32     q1,   q7,   d11[0]  \n\t" // column 1+= rhsPtr[8..11] * lhsPtr[4..7]
 240     "VMLA.F32     q2,   q7,   d13[0]  \n\t" // column 2+= rhsPtr[8..11] * lhsPtr[8..11]
 241     "VSTM         %0,   {q0-q2}       \n\t" // store entire output matrix.
 242     :
 243     : "r"(temp), "r"(lhsPtr), "r"(rhsPtr)
 244     : "%r0", "%q0", "%q1", "%q2", "%q4", "%q5", "%q6", "%q7", "memory");
 245
 246   temp[12] = 0.0f;
 247   temp[13] = 0.0f;
 248   temp[14] = 0.0f;
 249   temp[15] = 1.0f;
 250 #endif
 251 }
 252
 253 void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& projection)
 254 {
 255   // TODO : Implement with NEON.
 256   // Current NEON code is copy of Multiply.
 257
 258   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 259   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 40); // 40 = 10*4
 260
 261   float*       temp   = result.AsFloat();
 262   const float* rhsPtr = projection.AsFloat();
 263   const float* lhsPtr = lhs.AsFloat();
 264
 265 #ifndef __ARM_NEON__
 266
 267   // We only use rhsPtr's 0, 1, 2, 4, 5, 6, 10, 11, 14, 15 index.
 268   const float rhs0  = rhsPtr[0];
 269   const float rhs1  = rhsPtr[1];
 270   const float rhs2  = rhsPtr[2];
 271   const float rhs4  = rhsPtr[4];
 272   const float rhs5  = rhsPtr[5];
 273   const float rhs6  = rhsPtr[6];
 274   const float rhs10 = rhsPtr[10];
 275   const float rhs11 = rhsPtr[11];
 276   const float rhs14 = rhsPtr[14];
 277   const float rhs15 = rhsPtr[15];
 278
 279   for(int32_t i = 0; i < 4; i++)
 280   {
 281     // i<<2 gives the first vector / column
 282     const int32_t loc0 = i << 2;
 283     const int32_t loc1 = loc0 + 1;
 284     const int32_t loc2 = loc0 + 2;
 285     const int32_t loc3 = loc0 + 3;
 286
 287     const float value0 = lhsPtr[loc0];
 288     const float value1 = lhsPtr[loc1];
 289     const float value2 = lhsPtr[loc2];
 290     const float value3 = lhsPtr[loc3];
 291
 292     temp[loc0] = (value0 * rhs0) + (value1 * rhs4);
 293     temp[loc1] = (value0 * rhs1) + (value1 * rhs5);
 294     temp[loc2] = (value0 * rhs2) + (value1 * rhs6) + (value2 * rhs10) + (value3 * rhs14);
 295     temp[loc3] = (value2 * rhs11) + (value3 * rhs15);
 296   }
 297
 298 #else
 299
 300   // 64 32bit registers,
 301   // aliased to
 302   // d = 64 bit double-word d0 -d31
 303   // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
 304   // e.g. q0 = d0 and d1
 305
 306   // load and stores interleaved as NEON can load and store while calculating
 307   asm volatile(
 308     "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
 309     "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
 310     "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
 311     "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
 312     "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
 313     "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
 314
 315     "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
 316     "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
 317     "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
 318     "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
 319
 320     "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
 321     "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
 322     "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
 323     "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
 324
 325     "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
 326     "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
 327     "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
 328     "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
 329     "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
 330     : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
 331     :
 332     : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
 333
 334 #endif
 335 }
 336
 337 void MultiplyAssign(Dali::Matrix& result, const Dali::Matrix& rhs)
 338 {
 339   MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
 340   MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 64); // 64 = 16*4
 341
 342   // TODO : Implement with NEON.
 343
 344   float*       lhsPtr = result.AsFloat();
 345   const float* rhsPtr = rhs.AsFloat();
 346   float*       temp   = nullptr;
 347
 348   if(lhsPtr == rhsPtr)
 349   {
 350     // If rhs is same matrix with result, we need to copy temperal vaules.
 351     temp = static_cast<float*>(malloc(NUM_BYTES_IN_MATRIX));
 352     memcpy(temp, rhsPtr, NUM_BYTES_IN_MATRIX);
 353     rhsPtr = temp;
 354   }
 355
 356   // Calculate and store as row major.
 357   for(int32_t i = 0; i < 4; i++)
 358   {
 359     const int32_t loc0 = i;
 360     const int32_t loc1 = loc0 | 4;
 361     const int32_t loc2 = loc0 | 8;
 362     const int32_t loc3 = loc0 | 12;
 363
 364     const float value0 = lhsPtr[loc0];
 365     const float value1 = lhsPtr[loc1];
 366     const float value2 = lhsPtr[loc2];
 367     const float value3 = lhsPtr[loc3];
 368
 369     lhsPtr[loc0] = (value0 * rhsPtr[0]) +
 370                    (value1 * rhsPtr[1]) +
 371                    (value2 * rhsPtr[2]) +
 372                    (value3 * rhsPtr[3]);
 373
 374     lhsPtr[loc1] = (value0 * rhsPtr[4]) +
 375                    (value1 * rhsPtr[5]) +
 376                    (value2 * rhsPtr[6]) +
 377                    (value3 * rhsPtr[7]);
 378
 379     lhsPtr[loc2] = (value0 * rhsPtr[8]) +
 380                    (value1 * rhsPtr[9]) +
 381                    (value2 * rhsPtr[10]) +
 382                    (value3 * rhsPtr[11]);
 383
 384     lhsPtr[loc3] = (value0 * rhsPtr[12]) +
 385                    (value1 * rhsPtr[13]) +
 386                    (value2 * rhsPtr[14]) +
 387                    (value3 * rhsPtr[15]);
 388   }
 389
 390   if(temp)
 391   {
 392     // If we allocate temperal memory, we should free it.
 393     free(temp);
 394   }
 395 }
 396
 397 // Dali::Matrix3
 398
 399 void Multiply(Dali::Matrix3& result, const Dali::Matrix3& lhs, const Dali::Matrix3& rhs)
 400 {
 401   float*       temp   = result.AsFloat();
 402   const float* rhsPtr = rhs.AsFloat();
 403   const float* lhsPtr = lhs.AsFloat();
 404
 405   for(int32_t i = 0; i < 3; i++)
 406   {
 407     const int32_t loc0 = i * 3;
 408     const int32_t loc1 = loc0 + 1;
 409     const int32_t loc2 = loc0 + 2;
 410
 411     const float value0 = lhsPtr[loc0];
 412     const float value1 = lhsPtr[loc1];
 413     const float value2 = lhsPtr[loc2];
 414
 415     temp[loc0] = (value0 * rhsPtr[0]) +
 416                  (value1 * rhsPtr[3]) +
 417                  (value2 * rhsPtr[6]);
 418
 419     temp[loc1] = (value0 * rhsPtr[1]) +
 420                  (value1 * rhsPtr[4]) +
 421                  (value2 * rhsPtr[7]);
 422
 423     temp[loc2] = (value0 * rhsPtr[2]) +
 424                  (value1 * rhsPtr[5]) +
 425                  (value2 * rhsPtr[8]);
 426   }
 427 }
 428
 429 void MultiplyAssign(Dali::Matrix3& result, const Dali::Matrix3& rhs)
 430 {
 431   float*       lhsPtr = result.AsFloat();
 432   const float* rhsPtr = rhs.AsFloat();
 433   float*       temp   = nullptr;
 434
 435   if(lhsPtr == rhsPtr)
 436   {
 437     // If rhs is same matrix with result, we need to copy temperal vaules.
 438     temp = static_cast<float*>(malloc(NUM_BYTES_IN_MATRIX3));
 439     memcpy(temp, rhsPtr, NUM_BYTES_IN_MATRIX3);
 440     rhsPtr = temp;
 441   }
 442
 443   // Calculate and store as row major.
 444   for(int32_t i = 0; i < 3; i++)
 445   {
 446     const int32_t loc0 = i;
 447     const int32_t loc1 = loc0 + 3;
 448     const int32_t loc2 = loc0 + 6;
 449
 450     const float value0 = lhsPtr[loc0];
 451     const float value1 = lhsPtr[loc1];
 452     const float value2 = lhsPtr[loc2];
 453
 454     lhsPtr[loc0] = (value0 * rhsPtr[0]) +
 455                    (value1 * rhsPtr[1]) +
 456                    (value2 * rhsPtr[2]);
 457
 458     lhsPtr[loc1] = (value0 * rhsPtr[3]) +
 459                    (value1 * rhsPtr[4]) +
 460                    (value2 * rhsPtr[5]);
 461
 462     lhsPtr[loc2] = (value0 * rhsPtr[6]) +
 463                    (value1 * rhsPtr[7]) +
 464                    (value2 * rhsPtr[8]);
 465   }
 466
 467   if(temp)
 468   {
 469     // If we allocate temperal memory, we should free it.
 470     free(temp);
 471   }
 472 }
 473
 474 } // namespace MatrixUtils
 475 } // namespace Dali::Internal