[Tizen] Backport some MatrixUtils Operation

author Eunki Hong <eunkiki.hong@samsung.com>

Sat, 4 Feb 2023 04:08:53 +0000 (13:08 +0900)

committer Eunki, Hong <eunkiki.hong@samsung.com>

Thu, 20 Apr 2023 04:10:31 +0000 (13:10 +0900)
author Eunki Hong <eunkiki.hong@samsung.com>
Sat, 4 Feb 2023 04:08:53 +0000 (13:08 +0900)
committer Eunki, Hong <eunkiki.hong@samsung.com>
Thu, 20 Apr 2023 04:10:31 +0000 (13:10 +0900)
diff --git a/automated-tests/src/dali-internal/utc-Dali-Internal-MatrixUtils.cpp b/automated-tests/src/dali-internal/utc-Dali-Internal-MatrixUtils.cpp

index 8b6ce8f..3f5b2c1 100644 (file)
--- a/automated-tests/src/dali-internal/utc-Dali-Internal-MatrixUtils.cpp
+++ b/automated-tests/src/dali-internal/utc-Dali-Internal-MatrixUtils.cpp
@@ -112,9 +112,61 @@ int UtcDaliMatrixUtilsMultiplyMatrixQuaternionP(void)
    END_TEST;
  }
  
+int UtcDaliMatrixUtilsMultiplyTransformMatrix(void)
+{
+  tet_infoline("Multiplication two transform matrixs\n");
+
+  Matrix expectMatrix;
+  Matrix resultMatrix;
+  for(int32_t repeatCount = 0; repeatCount < 10; repeatCount++)
+  {
+    Vector3    lpos         = Vector3(Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+    Vector3    laxis        = Vector3(Dali::Random::Range(1.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+    float      lradian      = Dali::Random::Range(0.0f, 5.0f);
+    Quaternion lorientation = Quaternion(Radian(lradian), laxis);
+    Vector3    lscale       = Vector3(Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+
+    Vector3    rpos         = Vector3(Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+    Vector3    raxis        = Vector3(Dali::Random::Range(1.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+    float      rradian      = Dali::Random::Range(0.0f, 5.0f);
+    Quaternion rorientation = Quaternion(Radian(rradian), raxis);
+    Vector3    rscale       = Vector3(Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f), Dali::Random::Range(-50.0f, 50.0f));
+
+    Matrix lhs, rhs;
+    lhs.SetTransformComponents(lscale, lorientation, lpos);
+    rhs.SetTransformComponents(rscale, rorientation, rpos);
+
+    // Get result by Multiply API
+    Internal::MatrixUtils::Multiply(expectMatrix, lhs, rhs);
+    // Get result by MultiplyTransformMatrix API
+    Internal::MatrixUtils::MultiplyTransformMatrix(resultMatrix, lhs, rhs);
+
+    {
+      std::ostringstream oss;
+      oss << "lhs          : " << lhs << "\n";
+      oss << "lpos         : " << lpos << "\n";
+      oss << "lorientation : " << lorientation << "\n";
+      oss << "lscale       : " << lscale << "\n";
+
+      oss << "rhs          : " << rhs << "\n";
+      oss << "rpos         : " << rpos << "\n";
+      oss << "rorientation : " << rorientation << "\n";
+      oss << "rscale       : " << rscale << "\n";
+
+      oss << "expect     : " << expectMatrix << "\n";
+      oss << "result     : " << resultMatrix << "\n";
+      tet_printf("test result : \n%s\n", oss.str().c_str());
+    }
+
+    DALI_TEST_EQUALS(expectMatrix, resultMatrix, 0.01f, TEST_LOCATION);
+  }
+
+  END_TEST;
+}
+
  int UtcDaliMatrixUtilsMultiplyProjectionMatrix(void)
  {
-  tet_infoline("Multiplication Assign operator with self matrix\n");
+  tet_infoline("Multiplication projection matrix and random matrix\n");
  
    Matrix viewMatrix;
    Matrix projectionMatrix;
diff --git a/automated-tests/src/dali/utc-Dali-CameraActor.cpp b/automated-tests/src/dali/utc-Dali-CameraActor.cpp

index c162585..71ada71 100644 (file)
--- a/automated-tests/src/dali/utc-Dali-CameraActor.cpp
+++ b/automated-tests/src/dali/utc-Dali-CameraActor.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -2304,6 +2304,37 @@ int UtcDaliCameraActorReflectionByPlane(void)
    matrixAfter.GetTransformComponents(position, rotation, scale);
    DALI_TEST_EQUALS(reflected, rotation, 0.01f, TEST_LOCATION);
  
+  // Test Orthographic camera
+  freeLookCameraActor.SetProjectionMode(Dali::Camera::ProjectionMode::ORTHOGRAPHIC_PROJECTION);
+
+  // Make sure the recalculation will take place
+  freeLookCameraActor.SetProperty(Dali::DevelCameraActor::Property::REFLECTION_PLANE, Vector4(0.0f, 1.0f, 0.0f, 0.0f));
+
+  application.SendNotification();
+  application.Render();
+  application.SendNotification();
+  application.Render();
+
+  // Nothing should change despite of different camera type
+  matrixAfter.GetTransformComponents(position, rotation, scale);
+  DALI_TEST_EQUALS(reflected, rotation, 0.01f, TEST_LOCATION);
+
+  // Test Orthographic camera + Look at target
+  freeLookCameraActor.SetType(Camera::LOOK_AT_TARGET);
+  freeLookCameraActor.SetTargetPosition(targetPosition);
+
+  // Make sure the recalculation will take place
+  freeLookCameraActor.SetProperty(Dali::DevelCameraActor::Property::REFLECTION_PLANE, Vector4(0.0f, 1.0f, 0.0f, 0.0f));
+
+  application.SendNotification();
+  application.Render();
+  application.SendNotification();
+  application.Render();
+
+  // Nothing should change despite of different camera type
+  matrixAfter.GetTransformComponents(position, rotation, scale);
+  DALI_TEST_EQUALS(reflected, rotation, 0.01f, TEST_LOCATION);
+
    END_TEST;
  }
  
diff --git a/dali/internal/common/matrix-utils.cpp b/dali/internal/common/matrix-utils.cpp

index ce2eb1f..6ad3d7d 100644 (file)
--- a/dali/internal/common/matrix-utils.cpp
+++ b/dali/internal/common/matrix-utils.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -131,33 +131,34 @@ void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix&
  
    // 64 32bit registers,
    // aliased to
+  // s = 32 bit single-word s0 -s63
    // d = 64 bit double-word d0 -d31
    // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
-  // e.g. q0 = d0 and d1
+  // e.g. q0 = d0 and d1 = s0, s1, s2, and s3
  
    // load and stores interleaved as NEON can load and store while calculating
    asm volatile(
-    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
+    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[q0-q3]
      "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
-    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
-    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
-    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
-    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
-    "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
-    "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
-    "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
-    "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
-    "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
-    "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
-    "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
-    "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
-    "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
+    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0]
+    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4]
+    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8]
+    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12]
+
+    "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[1]
+    "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[5]
+    "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[9]
+    "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[13]
+
+    "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[2]
+    "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[6]
+    "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[10]
+    "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[14]
+
+    "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[3]
+    "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[7]
+    "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[11]
+    "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[15]
      "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
      : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
      :
@@ -217,44 +218,132 @@ void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Quatern
    }
  
  #else
+  // Store 4th row values that might be overwrited.
+  const float value0 = lhsPtr[3];
+  const float value1 = lhsPtr[7];
+  const float value2 = lhsPtr[11];
+  const float value3 = lhsPtr[15];
  
    // 64 32bit registers,
    // aliased to
+  // s = 32 bit single-word s0 -s63
    // d = 64 bit double-word d0 -d31
    // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
-  // e.g. q0 = d0 and d1
+  // e.g. q0 = d0 and d1 = s0, s1, s2, and s3
+
    // load and stores interleaved as NEON can load and store while calculating
    asm volatile(
-    "VLDM         %1,   {q4-q6}       \n\t" // load matrix 1 (lhsPtr)
-    "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
-    "VMUL.F32     q0,   q7,   d8[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
-    "VMUL.F32     q1,   q7,   d10[0]  \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
-    "VMUL.F32     q2,   q7,   d12[0]  \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
-    "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
-    "VMLA.F32     q0,   q7,   d8[1]   \n\t" // column 0+= rhsPtr[4..7] * lhsPtr[0..3]
-    "VMLA.F32     q1,   q7,   d10[1]  \n\t" // column 1+= rhsPtr[4..7] * lhsPtr[4..7]
-    "VMLA.F32     q2,   q7,   d12[1]  \n\t" // column 2+= rhsPtr[4..7] * lhsPtr[8..11]
-    "VLD1.F32     {q7}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
-    "VMLA.F32     q0,   q7,   d9[0]   \n\t" // column 0+= rhsPtr[8..11] * lhsPtr[0..3]
-    "VMLA.F32     q1,   q7,   d11[0]  \n\t" // column 1+= rhsPtr[8..11] * lhsPtr[4..7]
-    "VMLA.F32     q2,   q7,   d13[0]  \n\t" // column 2+= rhsPtr[8..11] * lhsPtr[8..11]
-    "VSTM         %0,   {q0-q2}       \n\t" // store entire output matrix.
+    "VLDM         %1,   {q0-q3}       \n\t" // load matrix 1 (lhsPtr) q[q0-q3]
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
+    "VMUL.F32     q4,   q8,   d0[0]   \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0]
+    "VMUL.F32     q5,   q8,   d2[0]   \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4]
+    "VMUL.F32     q6,   q8,   d4[0]   \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8]
+    "VMUL.F32     q7,   q8,   d6[0]   \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12]
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
+    "VMLA.F32     q4,   q8,   d0[1]   \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[1]
+    "VMLA.F32     q5,   q8,   d2[1]   \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[5]
+    "VMLA.F32     q6,   q8,   d4[1]   \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[9]
+    "VMLA.F32     q7,   q8,   d6[1]   \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[13]
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
+    "VMLA.F32     q4,   q8,   d1[0]   \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[2]
+    "VMLA.F32     q5,   q8,   d3[0]   \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[6]
+    "VMLA.F32     q6,   q8,   d5[0]   \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[10]
+    "VMLA.F32     q7,   q8,   d7[0]   \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[14]
+    "VSTM         %0,   {q4-q7}       \n\t" // store entire output matrix.
      :
      : "r"(temp), "r"(lhsPtr), "r"(rhsPtr)
-    : "%r0", "%q0", "%q1", "%q2", "%q4", "%q5", "%q6", "%q7", "memory");
+    : "%r0", "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "%q8", "memory");
  
-  temp[12] = 0.0f;
-  temp[13] = 0.0f;
-  temp[14] = 0.0f;
-  temp[15] = 1.0f;
+  // Restore 4th row values.
+  temp[3]  = value0;
+  temp[7]  = value1;
+  temp[11] = value2;
+  temp[15] = value3;
  #endif
  }
  
-void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& projection)
+void MultiplyTransformMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& rhs)
  {
-  // TODO : Implement with NEON.
-  // Current NEON code is copy of Multiply.
+  MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
+  MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 36); // 36 = 9*4
+
+  float*       temp   = result.AsFloat();
+  const float* rhsPtr = rhs.AsFloat();
+  const float* lhsPtr = lhs.AsFloat();
+
+#ifndef __ARM_NEON__
+
+  for(int32_t i = 0; i < 4; i++)
+  {
+    // i<<2 gives the first vector / column
+    const int32_t loc0 = i << 2;
+    const int32_t loc1 = loc0 + 1;
+    const int32_t loc2 = loc0 + 2;
+
+    const float value0 = lhsPtr[loc0];
+    const float value1 = lhsPtr[loc1];
+    const float value2 = lhsPtr[loc2];
+
+    temp[loc0] = (value0 * rhsPtr[0]) +
+                 (value1 * rhsPtr[4]) +
+                 (value2 * rhsPtr[8]) +
+                 (i == 3 ? rhsPtr[12] : 0.0f); // lhsPtr[loc3] is 0.0f, or 1.0f only if i == 3
+
+    temp[loc1] = (value0 * rhsPtr[1]) +
+                 (value1 * rhsPtr[5]) +
+                 (value2 * rhsPtr[9]) +
+                 (i == 3 ? rhsPtr[13] : 0.0f); // lhsPtr[loc3] is 0.0f, or 1.0f only if i == 3
+
+    temp[loc2] = (value0 * rhsPtr[2]) +
+                 (value1 * rhsPtr[6]) +
+                 (value2 * rhsPtr[10]) +
+                 (i == 3 ? rhsPtr[14] : 0.0f); // lhsPtr[loc3] is 0.0f, or 1.0f only if i == 3
+  }
+  temp[3] = temp[7] = temp[11] = 0.0f;
+  temp[15]                     = 1.0f;
+
+#else
+
+  // 64 32bit registers,
+  // aliased to
+  // s = 32 bit single-word s0 -s63
+  // d = 64 bit double-word d0 -d31
+  // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
+  // e.g. q0 = d0 and d1 = s0, s1, s2, and s3
+
+  // load and stores interleaved as NEON can load and store while calculating
+  asm volatile(
+    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[q0-q3]
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
+    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0]
+    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4]
+    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8]
+    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
+    "VMLA.F32     q12, q8, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[1]
+    "VMLA.F32     q13, q8, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[5]
+    "VMLA.F32     q14, q8, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[9]
+    "VMLA.F32     q15, q8, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[13]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
+    "VMLA.F32     q12, q8, d1[0]      \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[2]
+    "VMLA.F32     q13, q8, d3[0]      \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[6]
+    "VMLA.F32     q14, q8, d5[0]      \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[10]
+    "VMLA.F32     q15, q8, d7[0]      \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[14]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [12..15]
+    "VADD.F32     q15, q15, q8        \n\t" // column 3 = column3 + rhsPtr[12..15]
+    "VSTM         %0,  {q12-q15}      \n\t" // store entire output matrix.
+    :
+    : "r"(temp), "r"(lhsPtr), "r"(rhsPtr)
+    : "%r0", "q0", "q1", "q2", "q3", "q8", "q12", "q13", "q14", "q15", "memory");
+
+#endif
+}
  
+void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& projection)
+{
    MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
    MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 32); // 32 = 8*4
  
@@ -264,11 +353,13 @@ void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, con
  
  #ifndef __ARM_NEON__
  
-  // We only use rhsPtr's 0, 1, 4, 5, 10, 11, 14, 15 index.
+  // We only use rhsPtr's 0, 1, 2, 4, 5, 6, 10, 11, 14, 15 index.
    const float rhs0  = rhsPtr[0];
    const float rhs1  = rhsPtr[1];
+  const float rhs2  = rhsPtr[2];
    const float rhs4  = rhsPtr[4];
    const float rhs5  = rhsPtr[5];
+  const float rhs6  = rhsPtr[6];
    const float rhs10 = rhsPtr[10];
    const float rhs11 = rhsPtr[11];
    const float rhs14 = rhsPtr[14];
@@ -285,49 +376,49 @@ void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, con
      const float value0 = lhsPtr[loc0];
      const float value1 = lhsPtr[loc1];
      const float value2 = lhsPtr[loc2];
-    const float value3 = lhsPtr[loc3];
  
      temp[loc0] = (value0 * rhs0) + (value1 * rhs4);
      temp[loc1] = (value0 * rhs1) + (value1 * rhs5);
-    temp[loc2] = (value2 * rhs10) + (value3 * rhs14);
-    temp[loc3] = (value2 * rhs11) + (value3 * rhs15);
+    temp[loc2] = (value0 * rhs2) + (value1 * rhs6) + (value2 * rhs10) + (i == 3 ? rhs14 : 0.0f);
+    temp[loc3] = (value2 * rhs11) + (i == 3 ? rhs15 : 0.0f);
    }
  
  #else
  
    // 64 32bit registers,
    // aliased to
+  // s = 32 bit single-word s0 -s63
    // d = 64 bit double-word d0 -d31
    // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
-  // e.g. q0 = d0 and d1
+  // e.g. q0 = d0 and d1 = s0, s1, s2, and s3
  
    // load and stores interleaved as NEON can load and store while calculating
    asm volatile(
-    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[0..q3]
-    "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
-    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0..3]
-    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4..7]
-    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8..11]
-    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[0..3]
-    "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[4..7]
-    "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[8..11]
-    "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[0..3]
-    "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[4..7]
-    "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[8..11]
-    "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[12..15]
-
-    "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[0..3]
-    "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[4..7]
-    "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[8..11]
-    "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[12..15]
-    "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
-    : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
+    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[q0-q3]
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [0..3]
+    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0]
+    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4]
+    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8]
+    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [4..7]
+    "VMLA.F32     q12, q8, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[1]
+    "VMLA.F32     q13, q8, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[5]
+    "VMLA.F32     q14, q8, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[9]
+    "VMLA.F32     q15, q8, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[13]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [8..11]
+    "VMLA.F32     d25, d17, d1[0]     \n\t" // column 0[2,3] += rhsPtr[10,11] * lhsPtr[2]
+    "VMLA.F32     d27, d17, d3[0]     \n\t" // column 1[2,3] += rhsPtr[10,11] * lhsPtr[6]
+    "VMLA.F32     d29, d17, d5[0]     \n\t" // column 2[2,3] += rhsPtr[10,11] * lhsPtr[10]
+    "VMLA.F32     d31, d17, d7[0]     \n\t" // column 3[2,3] += rhsPtr[10,11] * lhsPtr[14]
+
+    "VLD1.F32     {q8}, [%2]!         \n\t" // load matrix 2 (rhsPtr) [12..15]
+    "VADD.F32     d31, d31, d17       \n\t" // column 3[2,3] = column3[2,3] + rhsPtr[14,15]
+    "VSTM         %0,  {q12-q15}      \n\t" // store entire output matrix.
      :
-    : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
+    : "r"(temp), "r"(lhsPtr), "r"(rhsPtr)
+    : "%r0", "q0", "q1", "q2", "q3", "q8", "q12", "q13", "q14", "q15", "memory");
  
  #endif
  }
@@ -337,7 +428,7 @@ void MultiplyAssign(Dali::Matrix& result, const Dali::Matrix& rhs)
    MATH_INCREASE_COUNTER(PerformanceMonitor::MATRIX_MULTIPLYS);
    MATH_INCREASE_BY(PerformanceMonitor::FLOAT_POINT_MULTIPLY, 64); // 64 = 16*4
  
-  // TODO : Implement with NEON.
+#ifndef __ARM_NEON__
  
    float*       lhsPtr = result.AsFloat();
    const float* rhsPtr = rhs.AsFloat();
@@ -390,6 +481,52 @@ void MultiplyAssign(Dali::Matrix& result, const Dali::Matrix& rhs)
      // If we allocate temperal memory, we should free it.
      free(temp);
    }
+
+#else
+  // We store temperal values into register. Don't worry about overlap.
+  // Copy normal Multiply code.
+  // Becareful the name of pointer is crossed!
+
+  float*       temp   = result.AsFloat();
+  const float* rhsPtr = result.AsFloat();
+  const float* lhsPtr = rhs.AsFloat();
+
+  // 64 32bit registers,
+  // aliased to
+  // s = 32 bit single-word s0 -s63
+  // d = 64 bit double-word d0 -d31
+  // q =128 bit quad-word   q0 -q15  (enough to handle a column of 4 floats in a matrix)
+  // e.g. q0 = d0 and d1 = s0, s1, s2, and s3
+
+  // load and stores interleaved as NEON can load and store while calculating
+  asm volatile(
+    "VLDM         %1,  {q0-q3}        \n\t" // load matrix 1 (lhsPtr) q[q0-q3]
+    "VLDM         %0,  {q8-q11}       \n\t" // load matrix 2 (rhsPtr) q[q8-q11]
+    "VMUL.F32     q12, q8, d0[0]      \n\t" // column 0 = rhsPtr[0..3] * lhsPtr[0]
+    "VMUL.F32     q13, q8, d2[0]      \n\t" // column 1 = rhsPtr[0..3] * lhsPtr[4]
+    "VMUL.F32     q14, q8, d4[0]      \n\t" // column 2 = rhsPtr[0..3] * lhsPtr[8]
+    "VMUL.F32     q15, q8, d6[0]      \n\t" // column 3 = rhsPtr[0..3] * lhsPtr[12]
+
+    "VMLA.F32     q12, q9, d0[1]      \n\t" // column 0 += rhsPtr[4..7] * lhsPtr[1]
+    "VMLA.F32     q13, q9, d2[1]      \n\t" // column 1 += rhsPtr[4..7] * lhsPtr[5]
+    "VMLA.F32     q14, q9, d4[1]      \n\t" // column 2 += rhsPtr[4..7] * lhsPtr[9]
+    "VMLA.F32     q15, q9, d6[1]      \n\t" // column 3 += rhsPtr[4..7] * lhsPtr[13]
+
+    "VMLA.F32     q12, q10, d1[0]     \n\t" // column 0 += rhsPtr[8..11] * lhsPtr[2]
+    "VMLA.F32     q13, q10, d3[0]     \n\t" // column 1 += rhsPtr[8..11] * lhsPtr[6]
+    "VMLA.F32     q14, q10, d5[0]     \n\t" // column 2 += rhsPtr[8..11] * lhsPtr[10]
+    "VMLA.F32     q15, q10, d7[0]     \n\t" // column 3 += rhsPtr[8..11] * lhsPtr[14]
+
+    "VMLA.F32     q12, q11, d1[1]     \n\t" // column 0 += rhsPtr[12..15] * lhsPtr[3]
+    "VMLA.F32     q13, q11, d3[1]     \n\t" // column 1 += rhsPtr[12..15] * lhsPtr[7]
+    "VMLA.F32     q14, q11, d5[1]     \n\t" // column 2 += rhsPtr[12..15] * lhsPtr[11]
+    "VMLA.F32     q15, q11, d7[1]     \n\t" // column 3 += rhsPtr[12..15] * lhsPtr[15]
+    "VSTM         %2,  {q12-q15}      \n\t" // store entire output matrix.
+    : "+r"(rhsPtr), "+r"(lhsPtr), "+r"(temp)
+    :
+    : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory");
+
+#endif
  }
  
  // Dali::Matrix3
diff --git a/dali/internal/common/matrix-utils.h b/dali/internal/common/matrix-utils.h

index f65687c..bd14506 100644 (file)
--- a/dali/internal/common/matrix-utils.h
+++ b/dali/internal/common/matrix-utils.h
@@ -2,7 +2,7 @@
  #define DALI_INTERNAL_MATRIX_UTILS_H
  
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -52,14 +52,36 @@ void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix&
  void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Quaternion& rhs);
  
  /**
- * @brief Function to multiply projection matrix and store the result onto third.
+ * @brief Function to multiply two transform matrix and store the result onto third.
+ *
+ * This API assume that both lhs and rhs are Transform Matrix.
+ * Scale & Rotation only has 3x3 area of matrix, and Translate only has [12,13,14] index.
+ * So, If we make Matrix for use Transform, 3, 7, 11 is always 0.0f, and 15 is always 1.0f.
+ * So we can reduce the number of multiplication.
+ *
+ * When we try to calculate WorldMatrix, It will have good efforts.
+ *
+ * Use this method in time critical path as it does not require temporaries.
+ *
+ * result = rhs * lhs
+ *
+ * @SINCE_2_2.15
+ * @param[out] result Result of the multiplication
+ * @param[in] lhs Transform Matrix, this cannot be same matrix as result
+ * @param[in] rhs Transform Matrix, this can be same matrix as result
+ */
+void MultiplyTransformMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& rhs);
+
+/**
+ * @brief Function to multiply projection matrix x transform matrix. and store the result onto third.
   *
   * This API assume that projection is Projection Matrix which top/bottom/left/right is symmetrical.
   *
   * Perspective matrix only has 0, 5, 10, 11, 14 (14 is const value, 1.0f).
   * Orthographic matrix only has 0, 5, 10, 14, 15 (15 is const value, 1.0f).
   * If window rotated, we use 1, 4 index instead of 0, 5.
- * So we only need 8 values to multiplication.
+ * If reflect plane used, we use 2, 6 index.
+ * So we only need 10 values to multiplication.
   *
   * Use this method in time critical path as it does not require temporaries.
   *
@@ -67,7 +89,7 @@ void Multiply(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Quatern
   *
   * @SINCE_2_1.46
   * @param[out] result Result of the multiplication
- * @param[in] lhs Matrix, this cannot be same matrix as result
+ * @param[in] lhs Transform Matrix, this cannot be same matrix as result
   * @param[in] projection Projection Matrix, this can be same matrix as result
   */
  void MultiplyProjectionMatrix(Dali::Matrix& result, const Dali::Matrix& lhs, const Dali::Matrix& projection);
diff --git a/dali/internal/event/actors/actor-coords.cpp b/dali/internal/event/actors/actor-coords.cpp

index d1a70db..2eb4042 100644 (file)
--- a/dali/internal/event/actors/actor-coords.cpp
+++ b/dali/internal/event/actors/actor-coords.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -79,11 +79,11 @@ bool ConvertScreenToLocal(
  {
    // Get the ModelView matrix
    Matrix modelView;
-  MatrixUtils::Multiply(modelView, worldMatrix, viewMatrix);
+  MatrixUtils::MultiplyTransformMatrix(modelView, worldMatrix, viewMatrix);
  
    // Calculate the inverted ModelViewProjection matrix; this will be used for 2 unprojects
    Matrix invertedMvp(false /*don't init*/);
-  MatrixUtils::Multiply(invertedMvp, modelView, projectionMatrix);
+  MatrixUtils::MultiplyProjectionMatrix(invertedMvp, modelView, projectionMatrix);
    bool success = invertedMvp.Invert();
  
    // Convert to GL coordinates
@@ -528,7 +528,7 @@ Matrix CalculateActorWorldTransform(const Actor& actor)
  
          //Update the world matrix
          Matrix tempMatrix;
-        MatrixUtils::Multiply(tempMatrix, localMatrix, worldMatrix);
+        MatrixUtils::MultiplyTransformMatrix(tempMatrix, localMatrix, worldMatrix);
          worldMatrix = tempMatrix;
        }
        else
diff --git a/dali/internal/event/actors/camera-actor-impl.cpp b/dali/internal/event/actors/camera-actor-impl.cpp

index af17277..42e0ca1 100644 (file)
--- a/dali/internal/event/actors/camera-actor-impl.cpp
+++ b/dali/internal/event/actors/camera-actor-impl.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -107,7 +107,7 @@ void BuildOrthoPickingRay(const Matrix&   viewMatrix,
  
    // Transforms the touch point from the screen reference system to the world reference system.
    Matrix invViewProjection(false); // Don't initialize.
-  MatrixUtils::Multiply(invViewProjection, viewMatrix, projectionMatrix);
+  MatrixUtils::MultiplyProjectionMatrix(invViewProjection, viewMatrix, projectionMatrix);
    if(!invViewProjection.Invert())
    {
      DALI_ASSERT_DEBUG(false);
diff --git a/dali/internal/event/common/projection.cpp b/dali/internal/event/common/projection.cpp

index e6cae8d..d662f2f 100644 (file)
--- a/dali/internal/event/common/projection.cpp
+++ b/dali/internal/event/common/projection.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@ bool UnprojectFull(const Vector4& windowPos,
                     Vector4&       objectPos)
  {
    Matrix invertedMvp(false); // Don't initialize.
-  MatrixUtils::Multiply(invertedMvp, modelView, projection);
+  MatrixUtils::MultiplyProjectionMatrix(invertedMvp, modelView, projection);
  
    if(invertedMvp.Invert())
    {
@@ -124,7 +124,7 @@ bool ProjectFull(const Vector4& position,
    bool ok = false;
  
    Matrix Mvp(false); // Don't initialize.
-  MatrixUtils::Multiply(Mvp, modelView, projection);
+  MatrixUtils::MultiplyProjectionMatrix(Mvp, modelView, projection);
  
    Vector4 p = Mvp * position;
  
diff --git a/dali/internal/render/renderers/render-renderer.cpp b/dali/internal/render/renderers/render-renderer.cpp

index efbd5b0..58311f2 100644 (file)
--- a/dali/internal/render/renderers/render-renderer.cpp
+++ b/dali/internal/render/renderers/render-renderer.cpp
@@ -479,7 +479,7 @@ bool Renderer::Render(Graphics::CommandBuffer&                             comma
      mRenderCallbackInput.size       = size;
      mRenderCallbackInput.projection = projectionMatrix;
  
-    MatrixUtils::Multiply(mRenderCallbackInput.mvp, modelViewMatrix, projectionMatrix);
+    MatrixUtils::MultiplyProjectionMatrix(mRenderCallbackInput.mvp, modelViewMatrix, projectionMatrix);
  
      // submit draw
      commandBuffer.DrawNative(&info);
@@ -732,7 +732,7 @@ void Renderer::WriteUniformBuffer(
      if(mvpUniformInfo && !mvpUniformInfo->name.empty())
      {
        Matrix modelViewProjectionMatrix(false);
-      MatrixUtils::Multiply(modelViewProjectionMatrix, modelViewMatrix, projectionMatrix);
+      MatrixUtils::MultiplyProjectionMatrix(modelViewProjectionMatrix, modelViewMatrix, projectionMatrix);
        WriteDefaultUniform(mvpUniformInfo, *uboView, modelViewProjectionMatrix);
      }
  
diff --git a/dali/internal/update/manager/render-instruction-processor.cpp b/dali/internal/update/manager/render-instruction-processor.cpp

index aae62bd..9dfbbf5 100644 (file)
--- a/dali/internal/update/manager/render-instruction-processor.cpp
+++ b/dali/internal/update/manager/render-instruction-processor.cpp
@@ -221,7 +221,7 @@ inline void AddRendererToRenderList(BufferIndex               updateBufferIndex,
  
        if(size.LengthSquared() > Math::MACHINE_EPSILON_1000)
        {
-        MatrixUtils::Multiply(nodeModelViewMatrix, nodeWorldMatrix, viewMatrix);
+        MatrixUtils::MultiplyTransformMatrix(nodeModelViewMatrix, nodeWorldMatrix, viewMatrix);
          nodeModelViewMatrixSet = true;
  
          // Assume actors are at z=0, compute AABB in view space & test rect intersection
@@ -322,7 +322,7 @@ inline void AddRendererToRenderList(BufferIndex               updateBufferIndex,
  
        if(!nodeModelViewMatrixSet)
        {
-        MatrixUtils::Multiply(nodeModelViewMatrix, nodeWorldMatrix, viewMatrix);
+        MatrixUtils::MultiplyTransformMatrix(nodeModelViewMatrix, nodeWorldMatrix, viewMatrix);
        }
        item.mModelViewMatrix = nodeModelViewMatrix;
  
diff --git a/dali/internal/update/manager/transform-manager.cpp b/dali/internal/update/manager/transform-manager.cpp

index 342d820..9f58997 100644 (file)
--- a/dali/internal/update/manager/transform-manager.cpp
+++ b/dali/internal/update/manager/transform-manager.cpp
@@ -261,7 +261,7 @@ bool TransformManager::Update()
          }
  
          //Update the world matrix
-        MatrixUtils::Multiply(mWorld[i], mLocal[i], mWorld[parentIndex]);
+        MatrixUtils::MultiplyTransformMatrix(mWorld[i], mLocal[i], mWorld[parentIndex]);
        }
        else
        {
diff --git a/dali/internal/update/render-tasks/scene-graph-camera.cpp b/dali/internal/update/render-tasks/scene-graph-camera.cpp

index 586d7cc..733e691 100644 (file)
--- a/dali/internal/update/render-tasks/scene-graph-camera.cpp
+++ b/dali/internal/update/render-tasks/scene-graph-camera.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -166,6 +166,154 @@ void Orthographic(Matrix& result, Dali::DevelCameraActor::ProjectionDirection or
    m[15] = 1.0f;
  }
  
+/**
+ * Adjust near plane for reflection
+ * @param[in] perspective Perspective matrix
+ * @param[in] clipPlane Clipping plane
+ * @param[in] far Far plane distance of original projection matrix
+ */
+void AdjustNearPlaneForPerspective(Matrix& perspective, const Vector4& clipPlane, float far)
+{
+  // Make third row of perspective projection matrix as clipPlane.
+  // If me let third row vector as v = (v[2], v[6], v[10], v[14]),
+  // z_n = v * (x, y, z, 1) / z
+
+  // For example, standard case : -1 for near, 1 for far.
+  // v.z * n + v.w = -n
+  // v.z * f + v.w = f
+  // This formular makes v.z = (f + n) / (f - n), v.w = -2fn / (f - n)
+
+  // Now, we should make like this : -1 for clipPlane, 1 for farPlane.
+  // Let we think some point p : c.x * p.x + c.y * p.y + c.z * p.z + c.w = 0.
+  // v.x * p.x + v.y * p.y + v.z * p.z + v.w = -p.z;
+
+  // Since point p doesn't have any special rule, we can think that
+  // (v.x, v.y, v.z + 1, v.w) = scale * (c.x, c.y, c.z, c.w).
+  // -->
+  // v.z = scale * c.z - 1.0,
+  // v.w = scale * c.w.
+
+  // Now we have to determine scale value.
+
+  // Reference of Far plane fomular : https://ubm-twvideo01.s3.amazonaws.com/o1/vault/gdc07/slides/S3730i1.pdf page 38
+  // Let we pick 'one of any edge' point Q which position on original projection frustum's far plane and...
+  // c.x * Q.x + c.y * Q.y + c.z * Q.z + c.w is maximum.
+
+  // To make Q far far away, Below fomular should be applied. (We can assume that Q.z is bigger than 0)
+  // || (v[0], 0, v[8], 0) * Q / Q.z || = 1 --> || v[0] * Q.x + v[8] * Q.z || = Q.z
+  // || (0, v[5], v[9], 0) * Q / Q.z || = 1 --> || v[5] * Q.y + v[9] * Q.z || = Q.z
+
+  // And the far plane case
+  // v * Q = Q.z
+  // --> (c * scale + (0, 0, -1, 0)) * Q = Q.z
+  // --> c * scale * Q = 2.0 * Q.z
+  // --> scale = 2.0 * Q.z / (c * Q)
+
+  float* v = perspective.AsFloat();
+
+  float maximalCDotQ = Math::MACHINE_EPSILON_0; // We should find CDotQ is positive.
+
+  float inverseV0 = 1.0f / v[0];
+  float inverseV5 = 1.0f / v[5];
+
+  // There are 4 case of solution. Choose one of them and check whether clipPlane * Q is maxium.
+  for(int testCase = 0; testCase != 4; ++testCase)
+  {
+    Vector4 Q(0.0f, 0.0f, far, 1.0f);
+
+    // Check for Q.x
+    // v[0] * Q.x = (+-1.0f - v[8]) * Q.z
+    Q.x = (((testCase & 1) ? 1.0f : -1.0f) - v[8]) * Q.z * inverseV0;
+    // v[5] * Q.y = (+-1.0f - v[9]) * Q.z
+    Q.y = (((testCase & 2) ? 1.0f : -1.0f) - v[9]) * Q.z * inverseV5;
+
+    maximalCDotQ = std::max(maximalCDotQ, clipPlane.Dot(Q));
+  }
+
+  float scale = 2.0f * far / maximalCDotQ;
+
+  Vector4 scaledPlaneVector = clipPlane * scale;
+
+  v[2]  = scaledPlaneVector.x;
+  v[6]  = scaledPlaneVector.y;
+  v[10] = scaledPlaneVector.z - 1.0f;
+  v[14] = scaledPlaneVector.w;
+}
+
+/**
+ * Adjust near plane for reflection
+ * @param[in] orthographic Orthographic matrix
+ * @param[in] clipPlane Clipping plane
+ * @param[in] far Far plane distance of original projection matrix
+ */
+void AdjustNearPlaneForOrthographic(Matrix& orthographic, const Vector4& clipPlane, float far)
+{
+  // Make third row of orthographic projection matrix as clipPlane.
+  // If me let third row vector as v = (v[2], v[6], v[10], v[14]),
+  // z_n = v * (x, y, z, 1)
+
+  // For example, standard case : -1 for near, 1 for far.
+  // v.z * n + v.w = -1
+  // v.z * f + v.w = 1
+  // This formular makes v.z = 2 / (f - n), v.w = -(f + n) / (f - n)
+
+  // Now, we should make like this : -1 for clipPlane, 1 for farPlane.
+  // Let we think some point p : c.x * p.x + c.y * p.y + c.z * p.z + c.w = 0.
+  // v.x * p.x + v.y * p.y + v.z * p.z + v.w = -1;
+
+  // Since point p doesn't have any special rule, we can think that
+  // (v.x, v.y, v.z, v.w + 1) = scale * (c.x, c.y, c.z, c.w).
+  // -->
+  // v.z = scale * c.z,
+  // v.w = scale * c.w - 1.0.
+
+  // Now we have to determine scale value.
+
+  // Reference of Far plane fomular : https://ubm-twvideo01.s3.amazonaws.com/o1/vault/gdc07/slides/S3730i1.pdf page 38
+  // Let we pick 'one of any edge' point Q which position on original projection frustum's far plane and...
+  // c.x * Q.x + c.y * Q.y + c.z * Q.z + c.w is maximum.
+
+  // To make Q far far away, Below fomular should be applied. (We can assume that Q.z is bigger than 0)
+  // || (v[0], 0, 0, v[12]) * Q || = 1 --> || v[0] * Q.x + v[12] || = 1
+  // || (0, v[5], 0, v[13]) * Q || = 1 --> || v[5] * Q.y + v[13] || = 1
+
+  // And the far plane case
+  // v * Q = 1
+  // --> (c * scale + (0, 0, 0, 1)) * Q = 1
+  // --> c * scale * Q = 2.0
+  // --> scale = 2.0 / (c * Q)
+
+  float* v = orthographic.AsFloat();
+
+  float maximalCDotQ = Math::MACHINE_EPSILON_0; // We should find CDotQ is positive.
+
+  float inverseV0 = 1.0f / v[0];
+  float inverseV5 = 1.0f / v[5];
+
+  // There are 4 case of solution. Choose one of them and check whether clipPlane * Q is maxium.
+  for(int testCase = 0; testCase != 4; ++testCase)
+  {
+    Vector4 Q(0.0f, 0.0f, far, 1.0f);
+
+    // Check for Q.x
+    // v[0] * Q.x = (+-1.0f - v[12])
+    Q.x = (((testCase & 1) ? 1.0f : -1.0f) - v[12]) * inverseV0;
+    // v[5] * Q.y = (+-1.0f - v[13])
+    Q.y = (((testCase & 2) ? 1.0f : -1.0f) - v[13]) * inverseV5;
+
+    maximalCDotQ = std::max(maximalCDotQ, clipPlane.Dot(Q));
+  }
+
+  float scale = 2.0f / maximalCDotQ;
+
+  Vector4 scaledPlaneVector = clipPlane * scale;
+
+  v[2]  = scaledPlaneVector.x;
+  v[6]  = scaledPlaneVector.y;
+  v[10] = scaledPlaneVector.z;
+  v[14] = scaledPlaneVector.w - 1.0f;
+}
+
  } // unnamed namespace
  
  const Dali::Camera::Type                          Camera::DEFAULT_TYPE(Dali::Camera::FREE_LOOK);
@@ -283,28 +431,10 @@ void VectorReflectedByPlane(Vector4& out, Vector4& in, Vector4& plane)
    out.w   = static_cast<float>(in.w - plane.w * d);
  }
  
-void Camera::AdjustNearPlaneForPerspective(Matrix& perspective, const Vector4& clipPlane)
-{
-  Vector4 q;
-  float*  v = perspective.AsFloat();
-
-  q.x = (Sign(clipPlane.x) + v[8]) / v[0];
-  q.y = (Sign(clipPlane.y) + v[9]) / v[5];
-  q.z = -1.0f;
-  q.w = (1.0f + v[10]) / v[14];
-
-  // Calculate the scaled plane vector
-  Vector4 c = clipPlane * (REFLECTION_NORMALIZED_DEVICE_COORDINATE_PARAMETER_A / q.Dot(clipPlane));
-
-  // Replace the third row of the projection v
-  v[2]  = c.x;
-  v[6]  = c.y;
-  v[10] = c.z + REFLECTION_NORMALIZED_DEVICE_COORDINATE_PARAMETER_D;
-  v[14] = c.w;
-}
-
  void Camera::SetReflectByPlane(const Vector4& plane)
  {
+  // Note :  we assume that plane.xyz is normal vector.
+
    float* v    = mReflectionMtx.AsFloat();
    float  _2ab = -2.0f * plane.x * plane.y;
    float  _2ac = -2.0f * plane.x * plane.z;
@@ -416,7 +546,7 @@ void Camera::Update(BufferIndex updateBufferIndex)
    if(viewUpdateCount > COPY_PREVIOUS_MATRIX || projectionUpdateCount > COPY_PREVIOUS_MATRIX)
    {
      // either has actually changed so recalculate
-    MatrixUtils::Multiply(mInverseViewProjection[updateBufferIndex], mViewMatrix[updateBufferIndex], mProjectionMatrix[updateBufferIndex]);
+    MatrixUtils::MultiplyProjectionMatrix(mInverseViewProjection[updateBufferIndex], mViewMatrix[updateBufferIndex], mProjectionMatrix[updateBufferIndex]);
      UpdateFrustum(updateBufferIndex);
  
      // ignore the error, if the view projection is incorrect (non inversible) then you will have tough times anyways
@@ -474,7 +604,7 @@ uint32_t Camera::UpdateViewMatrix(BufferIndex updateBufferIndex)
  
              Matrix& viewMatrix = mViewMatrix.Get(updateBufferIndex);
              Matrix  oldViewMatrix(viewMatrix);
-            MatrixUtils::Multiply(viewMatrix, oldViewMatrix, mReflectionMtx);
+            MatrixUtils::MultiplyTransformMatrix(viewMatrix, oldViewMatrix, mReflectionMtx);
            }
  
            viewMatrix.Invert();
@@ -514,10 +644,12 @@ uint32_t Camera::UpdateViewMatrix(BufferIndex updateBufferIndex)
              upNew3           = Vector3(upNew);
              LookAt(viewMatrix, positionNew3, targetNewVector3, upNew3);
  
-            Matrix oldViewMatrix(viewMatrix);
-            Matrix tmp;
-            tmp.SetIdentityAndScale(Vector3(-1.0, 1.0, 1.0));
-            MatrixUtils::Multiply(viewMatrix, oldViewMatrix, tmp);
+            // Invert X
+            float* vZ = viewMatrix.AsFloat();
+            vZ[0]     = -vZ[0];
+            vZ[4]     = -vZ[4];
+            vZ[8]     = -vZ[8];
+            vZ[12]    = -vZ[12];
  
              mReflectionEye     = positionNew;
              mUseReflectionClip = true;
@@ -539,8 +671,8 @@ uint32_t Camera::UpdateViewMatrix(BufferIndex updateBufferIndex)
  void Camera::UpdateFrustum(BufferIndex updateBufferIndex, bool normalize)
  {
    // Extract the clip matrix planes
-  Matrix clipMatrix;
-  MatrixUtils::Multiply(clipMatrix, mViewMatrix[updateBufferIndex], mProjectionMatrix[updateBufferIndex]);
+  Matrix clipMatrix(false); // Don't initialize.
+  MatrixUtils::MultiplyProjectionMatrix(clipMatrix, mViewMatrix[updateBufferIndex], mProjectionMatrix[updateBufferIndex]);
  
    const float*   cm     = clipMatrix.AsFloat();
    FrustumPlanes& planes = mFrustum[updateBufferIndex];
@@ -685,18 +817,12 @@ uint32_t Camera::UpdateProjection(BufferIndex updateBufferIndex)
              float         d               = mReflectionPlane.Dot(mReflectionEye);
              if(d < 0)
              {
-              adjReflectPlane.w = -adjReflectPlane.w;
+              // Original eyesight was behind of mReflectionPlane. Reverse the plane.
+              adjReflectPlane = -adjReflectPlane;
              }
  
              Vector4 customClipping = viewInv * adjReflectPlane;
-            AdjustNearPlaneForPerspective(projectionMatrix, customClipping);
-
-            // Invert Z
-            Matrix matZ;
-            matZ.SetIdentity();
-            float* vZ = matZ.AsFloat();
-            vZ[10]    = -vZ[10];
-            MatrixUtils::Multiply(projectionMatrix, projectionMatrix, matZ);
+            AdjustNearPlaneForPerspective(projectionMatrix, customClipping, mFarClippingPlane);
            }
            break;
          }
@@ -710,6 +836,26 @@ uint32_t Camera::UpdateProjection(BufferIndex updateBufferIndex)
                         mNearClippingPlane,
                         mFarClippingPlane,
                         mInvertYAxis);
+
+          //need to apply custom clipping plane
+          if(mUseReflectionClip)
+          {
+            Matrix& viewMatrix = mViewMatrix.Get(updateBufferIndex);
+            Matrix  viewInv    = viewMatrix;
+            viewInv.Invert();
+            viewInv.Transpose();
+
+            Dali::Vector4 adjReflectPlane = mReflectionPlane;
+            float         d               = mReflectionPlane.Dot(mReflectionEye);
+            if(d < 0)
+            {
+              // Original eyesight was behind of mReflectionPlane. Reverse the plane.
+              adjReflectPlane = -adjReflectPlane;
+            }
+
+            Vector4 customClipping = viewInv * adjReflectPlane;
+            AdjustNearPlaneForOrthographic(projectionMatrix, customClipping, mFarClippingPlane);
+          }
            break;
          }
        }
@@ -739,11 +885,8 @@ uint32_t Camera::UpdateProjection(BufferIndex updateBufferIndex)
            break;
        }
  
-      Matrix rotation;
-      rotation.SetIdentity();
-      rotation.SetTransformComponents(Vector3(1.0f, 1.0f, 1.0f), rotationAngle, Vector3(0.0f, 0.0f, 0.0f));
-
-      MatrixUtils::Multiply(finalProjection, mProjectionMatrix.Get(updateBufferIndex), rotation);
+      // TODO : Can't we make finalProjection without matrix multiply?
+      MatrixUtils::Multiply(finalProjection, mProjectionMatrix.Get(updateBufferIndex), rotationAngle);
      }
      --mUpdateProjectionFlag;
    }
diff --git a/dali/internal/update/render-tasks/scene-graph-camera.h b/dali/internal/update/render-tasks/scene-graph-camera.h

index 6604fb9..278d765 100644 (file)
--- a/dali/internal/update/render-tasks/scene-graph-camera.h
+++ b/dali/internal/update/render-tasks/scene-graph-camera.h
@@ -2,7 +2,7 @@
  #define DALI_INTERNAL_SCENE_GRAPH_CAMERA_H
  
  /*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -384,13 +384,6 @@ private:
     */
    void UpdateFrustum(BufferIndex updateBufferIndex, bool normalize = true);
  
-  /**
-   * Adjust near plane for reflection
-   * @param perspective Perspective matrix
-   * @param clipPlane Clipping plane
-   */
-  void AdjustNearPlaneForPerspective(Matrix& perspective, const Vector4& clipPlane);
-
    uint32_t mUpdateViewFlag;       ///< This is non-zero if the view matrix requires an update
    uint32_t mUpdateProjectionFlag; ///< This is non-zero if the projection matrix requires an update
    int      mProjectionRotation;   ///< The rotaion angle of the projection
author	Eunki Hong <eunkiki.hong@samsung.com>
	Sat, 4 Feb 2023 04:08:53 +0000 (13:08 +0900)
committer	Eunki, Hong <eunkiki.hong@samsung.com>
	Thu, 20 Apr 2023 04:10:31 +0000 (13:10 +0900)
automated-tests/src/dali-internal/utc-Dali-Internal-MatrixUtils.cpp		patch \| blob \| history
automated-tests/src/dali/utc-Dali-CameraActor.cpp		patch \| blob \| history
dali/internal/common/matrix-utils.cpp		patch \| blob \| history
dali/internal/common/matrix-utils.h		patch \| blob \| history
dali/internal/event/actors/actor-coords.cpp		patch \| blob \| history
dali/internal/event/actors/camera-actor-impl.cpp		patch \| blob \| history
dali/internal/event/common/projection.cpp		patch \| blob \| history
dali/internal/render/renderers/render-renderer.cpp		patch \| blob \| history
dali/internal/update/manager/render-instruction-processor.cpp		patch \| blob \| history
dali/internal/update/manager/transform-manager.cpp		patch \| blob \| history
dali/internal/update/render-tasks/scene-graph-camera.cpp		patch \| blob \| history
dali/internal/update/render-tasks/scene-graph-camera.h		patch \| blob \| history