From: Eunki, Hong <eunkiki.hong@samsung.com>
Date: Tue, 13 May 2025 08:22:15 +0000 (+0900)
Subject: Minor optimization for matrix operations
X-Git-Tag: accepted/tizen/unified/20250515.075542~1^2~1
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=refs%2Fchanges%2F35%2F324135%2F7;p=platform%2Fcore%2Fuifw%2Fdali-core.git

Minor optimization for matrix operations

1. Make GetScale() more faster, to get the length without create Vector3
2. Add new API GetScaleXY() for dirty rect calculation
3. Optimize bound sphere calculation logic

Change-Id: Ifde5a2f0a1e3056c9443eff3be5bf8bc13561631
Signed-off-by: Eunki, Hong <eunkiki.hong@samsung.com>
---

diff --git a/automated-tests/src/dali/utc-Dali-Matrix.cpp b/automated-tests/src/dali/utc-Dali-Matrix.cpp
index da8e2652f..f16d360fc 100644
--- a/automated-tests/src/dali/utc-Dali-Matrix.cpp
+++ b/automated-tests/src/dali/utc-Dali-Matrix.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -415,6 +415,7 @@ int UtcDaliMatrixGetScale(void)
       for(float z = 0.0f; z < 2.0f; z += 0.1f)
       {
         Vector3 vScale(x, y, z);
+        Vector2 vScaleXY(x, y);
 
         for(float angle = 5.0f; angle <= 360.0f; angle += 15.0f)
         {
@@ -427,9 +428,11 @@ int UtcDaliMatrixGetScale(void)
           Matrix m1(false);
           m1.SetTransformComponents(vScale, rotation1, position1);
 
-          Vector3 scale2 = m1.GetScale();
+          Vector3 scale2   = m1.GetScale();
+          Vector2 scale2XY = m1.GetScaleXY();
 
           DALI_TEST_EQUALS(vScale, scale2, 0.001, TEST_LOCATION);
+          DALI_TEST_EQUALS(vScaleXY, scale2XY, 0.001, TEST_LOCATION);
         }
       }
     }
diff --git a/automated-tests/src/dali/utc-Dali-Vector4.cpp b/automated-tests/src/dali/utc-Dali-Vector4.cpp
index cd96beac9..fec963121 100644
--- a/automated-tests/src/dali/utc-Dali-Vector4.cpp
+++ b/automated-tests/src/dali/utc-Dali-Vector4.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -87,6 +87,17 @@ int UtcDaliVector4Constructor05P(void)
   END_TEST;
 }
 
+int UtcDaliVector4Constructor06P(void)
+{
+  Vector3 vec3(1.f, 2.f, 3.f);
+  Vector4 v(vec3, 4.f);
+  DALI_TEST_EQUALS(v.x, 1.0f, TEST_LOCATION);
+  DALI_TEST_EQUALS(v.y, 2.0f, TEST_LOCATION);
+  DALI_TEST_EQUALS(v.z, 3.0f, TEST_LOCATION);
+  DALI_TEST_EQUALS(v.w, 4.0f, TEST_LOCATION);
+  END_TEST;
+}
+
 int UtcDaliVector4CopyConstructor(void)
 {
   TestApplication application;
diff --git a/dali/internal/common/math.cpp b/dali/internal/common/math.cpp
index 24cec736e..639c5f70a 100644
--- a/dali/internal/common/math.cpp
+++ b/dali/internal/common/math.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,14 +41,14 @@ void Dali::Internal::TransformVector3(Vec3 result, const Mat4 m, const Vec3 v)
   Vec4 tempResult;
 
   asm volatile(
-    "VLD1.F32   {q0}, [%1]     \n\t" //Load "temp" from memory to register q0
-    "VLD1.F32   {q1}, [%0]!    \n\t" //Load first row of the matrix from memory to register q1
-    "VMUL.F32   q2, q1, d0[0]  \n\t" //q2 = (m[0..3] * v.x)
-    "VLD1.F32   {q1}, [%0]!    \n\t" //Load second row of the matrix from memory
-    "VMLA.F32   q2, q1, d0[1]  \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y)
-    "VLD1.F32   {q1}, [%0]!    \n\t" //Load third row of the matrix from memory
-    "VMLA.F32   q2, q1, d1[0]  \n\t" //q2 = (m[0..3] * v.x) + (m[4..7] * v.y) + (m[8...11] * v.z)
-    "VST1.F32   {q2}, [%2]     \n\t" //Write the result back to memory
+    "VLD1.F32   {q0}, [%1]     \n\t" // Load "temp" from memory to register q0
+    "VLD1.F32   {q1}, [%0]!    \n\t" // Load first row of the matrix from memory to register q1
+    "VMUL.F32   q2, q1, d0[0]  \n\t" // q2 = (m[0..3] * v.x)
+    "VLD1.F32   {q1}, [%0]!    \n\t" // Load second row of the matrix from memory
+    "VMLA.F32   q2, q1, d0[1]  \n\t" // q2 = (m[0..3] * v.x) + (m[4..7] * v.y)
+    "VLD1.F32   {q1}, [%0]!    \n\t" // Load third row of the matrix from memory
+    "VMLA.F32   q2, q1, d1[0]  \n\t" // q2 = (m[0..3] * v.x) + (m[4..7] * v.y) + (m[8...11] * v.z)
+    "VST1.F32   {q2}, [%2]     \n\t" // Write the result back to memory
     :
     : "r"(m), "r"(temp), "r"(tempResult)
     : "q0", "q1", "q2", "memory");
@@ -79,3 +79,8 @@ float Dali::Internal::Length(const Vec3 v)
 {
   return sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
 }
+
+float Dali::Internal::TransformFloat(const Mat4 m, const float x)
+{
+  return sqrtf(m[0] * m[0] + m[1] * m[1] + m[2] * m[2]) * x;
+}
\ No newline at end of file
diff --git a/dali/internal/common/math.h b/dali/internal/common/math.h
index c3dd17782..e10c067a6 100644
--- a/dali/internal/common/math.h
+++ b/dali/internal/common/math.h
@@ -2,7 +2,7 @@
 #define DALI_INTERNAL_MATH_H
 
 /*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,6 +57,15 @@ Vector2 Transform2D(const Matrix& matrix, const float x, const float y);
  */
 float Length(const Vec3 v);
 
+/**
+ * @brief Applies a transformation matrix to a Vector3(x, 0, 0) and get the length of result matrix
+ *
+ * @param[in] m The transformation matrix
+ * @param[in] v The x components of vector to transform
+ * @return      The length of transformed vector
+ */
+float TransformFloat(const Mat4 m, const float x);
+
 } // namespace Internal
 
 } // namespace Dali
diff --git a/dali/internal/update/manager/render-instruction-processor.cpp b/dali/internal/update/manager/render-instruction-processor.cpp
index 52270a056..05202ec1c 100644
--- a/dali/internal/update/manager/render-instruction-processor.cpp
+++ b/dali/internal/update/manager/render-instruction-processor.cpp
@@ -212,13 +212,13 @@ inline void AddRendererToRenderList(BufferIndex               updateBufferIndex,
       node->UpdatePartialRenderingData(updateBufferIndex, isLayer3d);
 
       const Vector4& nodeUpdateArea = nodePartialRenderingData.mNodeInfomations.updatedPositionSize;
-      const Vector3& nodeScale      = nodePartialRenderingData.mNodeInfomations.modelMatrix.GetScale();
+      const Vector2& nodeScaleXY    = nodePartialRenderingData.mNodeInfomations.modelMatrix.GetScaleXY();
 
       nodePartialRenderingDataUpdateChecked = true;
 
-      const Vector3& size = Vector3(nodeUpdateArea.z, nodeUpdateArea.w, 0.0f) * nodeScale;
+      const Vector2& sizeXY = Vector2(nodeUpdateArea.z * nodeScaleXY.x, nodeUpdateArea.w * nodeScaleXY.y);
 
-      if(size.LengthSquared() > Math::MACHINE_EPSILON_1000)
+      if(sizeXY.LengthSquared() > Math::MACHINE_EPSILON_1000)
       {
         MatrixUtils::MultiplyTransformMatrix(nodeModelViewMatrix, nodePartialRenderingData.mNodeInfomations.modelMatrix, viewMatrix);
         nodeModelViewMatrixSet = true;
diff --git a/dali/internal/update/manager/transform-manager.cpp b/dali/internal/update/manager/transform-manager.cpp
index e0289c44b..775e3860b 100644
--- a/dali/internal/update/manager/transform-manager.cpp
+++ b/dali/internal/update/manager/transform-manager.cpp
@@ -496,13 +496,16 @@ bool TransformManager::Update()
       }
     }
 
-    // Update the bounding sphere
-    Vec3 centerToEdge = {mSize[i].Length() * 0.5f, 0.0f, 0.0f};
-    Vec3 centerToEdgeWorldSpace;
-    TransformVector3(centerToEdgeWorldSpace, mWorld[i].AsFloat(), centerToEdge);
+    // TODO : We need to check mComponentDirty since we have to check the size changeness.
+    //        Could we check size changeness only?
+    if(mComponentDirty[i] || mWorldMatrixDirty[i])
+    {
+      // Update the bounding sphere
+      float centerToEdge           = mSize[i].Length() * 0.5f;
+      float centerToEdgeWorldSpace = TransformFloat(mWorld[i].AsFloat(), centerToEdge);
 
-    mBoundingSpheres[i]   = mWorld[i].GetTranslation();
-    mBoundingSpheres[i].w = Length(centerToEdgeWorldSpace);
+      mBoundingSpheres[i] = std::move(Vector4(mWorld[i].GetTranslation3(), centerToEdgeWorldSpace));
+    }
 
     mUpdated = mUpdated || mWorldMatrixDirty[i];
 
diff --git a/dali/public-api/math/matrix.cpp b/dali/public-api/math/matrix.cpp
index 7b810bbdb..f0741c671 100644
--- a/dali/public-api/math/matrix.cpp
+++ b/dali/public-api/math/matrix.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
 #include <dali/public-api/common/dali-common.h>
 #include <dali/public-api/math/math-utils.h>
 #include <dali/public-api/math/quaternion.h>
+#include <dali/public-api/math/vector2.h>
 #include <dali/public-api/math/vector3.h>
 #include <dali/public-api/math/vector4.h>
 
@@ -290,17 +291,17 @@ Vector4 Matrix::operator*(const Vector4& rhs) const
   // e.g. q0 = d0 and d1
   // load and stores interleaved as NEON can load and store while calculating
   asm volatile(
-    "VLD1.F32     {q0}, [%1]        \n\t" //q0 = rhs
+    "VLD1.F32     {q0}, [%1]        \n\t" // q0 = rhs
     "VLD1.F32     {q9}, [%0]!       \n\t"
     "VMUL.F32     q10,  q9,   d0[0] \n\t"
     "VLD1.F32     {q9}, [%0]!       \n\t"
-    "VMLA.F32     q10,  q9,   d0[1] \n\t" //q10 = mMatrix[0..3] * rhs + mMatrix[4..7] * rhs
+    "VMLA.F32     q10,  q9,   d0[1] \n\t" // q10 = mMatrix[0..3] * rhs + mMatrix[4..7] * rhs
     "VLD1.F32     {q9}, [%0]!       \n\t"
     "VMUL.F32     q11,  q9,   d1[0] \n\t"
     "VLD1.F32     {q9}, [%0]!       \n\t"
-    "VMLA.F32     q11,  q9,   d1[1] \n\t" //q11 = mMatrix[8..11] * rhs + mMatrix[12..15] * rhs
+    "VMLA.F32     q11,  q9,   d1[1] \n\t" // q11 = mMatrix[8..11] * rhs + mMatrix[12..15] * rhs
     "VADD.F32     q10,  q10,  q11   \n\t"
-    "VST1.F32     {q10},[%2]        \n\t" //temp = q10 + q11
+    "VST1.F32     {q10},[%2]        \n\t" // temp = q10 + q11
     :
     : "r"(mMatrix), "r"(&rhs), "r"(&temp)
     : "q0", "q9", "q10", "q11", "memory");
@@ -564,7 +565,15 @@ void Matrix::SetInverseTransformComponents(const Vector3& xAxis,
 Vector3 Matrix::GetScale() const
 {
   // Derive scale from axis lengths.
-  return Vector3(GetXAxis().Length(), GetYAxis().Length(), GetZAxis().Length());
+  // Note : To avoid struct creation, let we access memory directoy, instead call GetXAxis(), GetYAxis() and GetZAxis().
+  return Vector3(reinterpret_cast<const Vector3&>(mMatrix[0]).Length(), reinterpret_cast<const Vector3&>(mMatrix[4]).Length(), reinterpret_cast<const Vector3&>(mMatrix[8]).Length());
+}
+
+Vector2 Matrix::GetScaleXY() const
+{
+  // Derive scale from axis lengths.
+  // Note : To avoid struct creation, let we access memory directoy, instead call GetXAxis() and GetYAxis().
+  return Vector2(reinterpret_cast<const Vector3&>(mMatrix[0]).Length(), reinterpret_cast<const Vector3&>(mMatrix[4]).Length());
 }
 
 void Matrix::GetTransformComponents(Vector3&    position,
diff --git a/dali/public-api/math/matrix.h b/dali/public-api/math/matrix.h
index 332163db1..322e71ae4 100644
--- a/dali/public-api/math/matrix.h
+++ b/dali/public-api/math/matrix.h
@@ -2,7 +2,7 @@
 #define DALI_MATRIX_H
 
 /*
- * Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -266,6 +266,16 @@ public:
    */
   Vector3 GetScale() const;
 
+  /**
+   * @brief Gets the x,y components of the scale as a Vector2.
+   * Note that transform scale always has positive components.
+   *
+   * This assumes the matrix is a transform matrix.
+   * @SINCE_2_4.19
+   * @return The scale of xy components
+   */
+  Vector2 GetScaleXY() const;
+
   /**
    * @brief Sets the translation.
    *
diff --git a/dali/public-api/math/vector4.cpp b/dali/public-api/math/vector4.cpp
index b57170ed3..a1a3d7970 100644
--- a/dali/public-api/math/vector4.cpp
+++ b/dali/public-api/math/vector4.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,6 +55,14 @@ Vector4::Vector4(const Vector3& vec3)
 {
 }
 
+Vector4::Vector4(const Vector3& vec3, float w)
+: x(vec3.x),
+  y(vec3.y),
+  z(vec3.z),
+  w(w)
+{
+}
+
 Vector4& Vector4::operator=(const Vector2& vec2)
 {
   x = vec2.x;
diff --git a/dali/public-api/math/vector4.h b/dali/public-api/math/vector4.h
index 3fa22e480..6663634fc 100644
--- a/dali/public-api/math/vector4.h
+++ b/dali/public-api/math/vector4.h
@@ -2,7 +2,7 @@
 #define DALI_VECTOR_4_H
 
 /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -110,6 +110,15 @@ struct DALI_CORE_API Vector4
    */
   explicit Vector4(const Vector3& vec3);
 
+  /**
+   * @brief Conversion constructor from Vector3 and float.
+   *
+   * @SINCE_2_4.19
+   * @param[in] vec3 Vector3 to copy from
+   * @param[in] w w (or a/q) component
+   */
+  explicit Vector4(const Vector3& vec3, float w);
+
   // Constants
   static const Vector4 ONE;   ///< (1.0f,1.0f,1.0f,1.0f)
   static const Vector4 XAXIS; ///< (1.0f,0.0f,0.0f,0.0f)
@@ -567,9 +576,9 @@ struct DALI_CORE_API Vector4
   };
 
 public:
-  Vector4(const Vector4&)     = default;            ///< Default copy constructor
-  Vector4(Vector4&&) noexcept = default;            ///< Default move constructor
-  Vector4& operator=(const Vector4&) = default;     ///< Default copy assignment operator
+  Vector4(const Vector4&)                = default; ///< Default copy constructor
+  Vector4(Vector4&&) noexcept            = default; ///< Default move constructor
+  Vector4& operator=(const Vector4&)     = default; ///< Default copy assignment operator
   Vector4& operator=(Vector4&&) noexcept = default; ///< Default move assignment operator
 };