2 Applied Research Associates Inc. (c)2011
\r
4 Redistribution and use in source and binary forms,
\r
5 with or without modification, are permitted provided that the
\r
6 following conditions are met:
\r
7 * Redistributions of source code must retain the above copyright
\r
8 notice, this list of conditions and the following disclaimer.
\r
9 * Redistributions in binary form must reproduce the above copyright
\r
10 notice, this list of conditions and the following disclaimer in the
\r
11 documentation and/or other materials provided with the distribution.
\r
12 * Neither the name of the Applied Research Associates Inc nor the names
\r
13 of its contributors may be used to endorse or promote products derived
\r
14 from this software without specific prior written permission.
\r
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
\r
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
\r
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
\r
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
\r
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
\r
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
\r
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
\r
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
\r
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
\r
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
\r
26 POSSIBILITY OF SUCH DAMAGE.
\r
29 #include "test_neon.h"
\r
30 #define SCE_PFX_USE_PERFCOUNTER
\r
31 #include "physics_effects.h"
\r
32 #include <arm_neon.h>
\r
36 #include <android/log.h>
\r
38 // This works with gcc
\r
39 #define SET_ALIGNMENT(alignment) __attribute__((__aligned__((alignment))))
\r
41 // assembly implementations
\r
44 void TransposeMatrix3Neon(float *mCol, float *pfResult);
\r
47 //----------------------------------------------------------------------------
\r
48 // TransposeMatrix3Scalar
\r
50 /// Performs a transpose on a matrix3 using scalar math, storing the
\r
51 /// result directly into system memory.
\r
53 /// @param mCol Input matrix. Must point to 3x 4 float values
\r
54 /// @param pfResult [in] pointer to a float(matrix3). [out] Contains the result
\r
55 //----------------------------------------------------------------------------
\r
56 //inline const Matrix3 transpose( const Matrix3 & mat )
\r
59 // Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
\r
60 // Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
\r
61 // Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
\r
64 void TransposeMatrix3Scalar(float *mCol, float *pfResult)
\r
66 pfResult[0] = mCol[0];
\r
67 pfResult[1] = mCol[4];
\r
68 pfResult[2] = mCol[8];
\r
71 pfResult[4] = mCol[1];
\r
72 pfResult[5] = mCol[5];
\r
73 pfResult[6] = mCol[9];
\r
76 pfResult[8] = mCol[2];
\r
77 pfResult[9] = mCol[6];
\r
78 pfResult[10] = mCol[10];
\r
79 pfResult[11] = 0.0f;
\r
82 //----------------------------------------------------------------------------
\r
83 // TestNeonTransposeMatrix3
\r
85 /// Run timing study of the matrix3 transpose functions above, writing the
\r
86 /// results to the Android verbose log.
\r
87 //----------------------------------------------------------------------------
\r
88 void TestNeonTransposeMatrix3()
\r
90 float SET_ALIGNMENT(64) data[] = {float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f,
\r
91 float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f,
\r
92 float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f};
\r
93 float *mCol = &data[0];
\r
98 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
99 sprintf(szMsg,"---------------------------------------");
\r
100 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
101 sprintf(szMsg,"TestNeonTransposeMatrix3 Start");
\r
102 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
103 sprintf(szMsg, "Test input mCol: <%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",
\r
104 mCol[0], mCol[1], mCol[2], mCol[3],
\r
105 mCol[4], mCol[5], mCol[6], mCol[7],
\r
106 mCol[8], mCol[9], mCol[10], mCol[11]);
\r
107 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
109 float SET_ALIGNMENT(64) fResult[12];
\r
112 sce::PhysicsEffects::PfxPerfCounter pc;
\r
113 double dTimeSpan, dRefTimeSpan;;
\r
114 unsigned int uiNumTries = 10000000;
\r
117 // profile scalar TransposeMatrix3Scalar with direct memory return, c++ version
\r
120 for(i = 0; i < uiNumTries; i++)
\r
122 TransposeMatrix3Scalar(mCol, fResult);
\r
125 dTimeSpan = pc.getCountTime(0);
\r
127 dRefTimeSpan = dTimeSpan;
\r
128 sprintf(szMsg, "Time to do %i calls for TransposeMatrix3Scalar: %f secs, speedup: %5.2f, result value=<%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",
\r
129 uiNumTries, dTimeSpan, dRefTimeSpan/dTimeSpan, fResult[0], fResult[1], fResult[2], fResult[3],
\r
130 fResult[4], fResult[5], fResult[6], fResult[7],
\r
131 fResult[8], fResult[9], fResult[10], fResult[11]);
\r
132 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
135 // profile NEON TransposeMatrix3Neon with direct memory return, assembly version
\r
138 for(i = 0; i < uiNumTries; i++)
\r
140 TransposeMatrix3Neon(mCol, fResult);
\r
143 dTimeSpan = pc.getCountTime(0);
\r
145 sprintf(szMsg, "Time to do %i calls for TransposeMatrix3Neon: %f secs, speedup: %5.2f, result value=<%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",
\r
146 uiNumTries, dTimeSpan, dRefTimeSpan/dTimeSpan, fResult[0], fResult[1], fResult[2], fResult[3],
\r
147 fResult[4], fResult[5], fResult[6], fResult[7],
\r
148 fResult[8], fResult[9], fResult[10], fResult[11]);
\r
149 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r
152 sprintf(szMsg,"TestNeonTransposeMatrix3 End");
\r
153 __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);
\r