Tizen 2.1 base
[platform/upstream/libbullet.git] / Extras / PhysicsEffects / sample / test_ARM_NEON_performance / test_neon_transpose_matrix3.cpp
1 /*\r
2  Applied Research Associates Inc. (c)2011\r
3 \r
4  Redistribution and use in source and binary forms,\r
5    with or without modification, are permitted provided that the\r
6    following conditions are met:\r
7     * Redistributions of source code must retain the above copyright\r
8       notice, this list of conditions and the following disclaimer.\r
9     * Redistributions in binary form must reproduce the above copyright\r
10       notice, this list of conditions and the following disclaimer in the\r
11       documentation and/or other materials provided with the distribution.\r
12     * Neither the name of the Applied Research Associates Inc nor the names\r
13       of its contributors may be used to endorse or promote products derived\r
14       from this software without specific prior written permission.\r
15 \r
16    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"\r
17    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
18    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
19    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\r
20    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\r
21    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\r
22    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\r
23    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\r
24    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\r
25    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\r
26    POSSIBILITY OF SUCH DAMAGE.\r
27 */\r
28 \r
29 #include "test_neon.h"\r
30 #define SCE_PFX_USE_PERFCOUNTER\r
31 #include "physics_effects.h"\r
32 #include <arm_neon.h>\r
33 #include <string.h>\r
34 #include <stdio.h>\r
35 #include <stdlib.h>\r
36 #include <android/log.h>\r
37 \r
38 // This works with gcc\r
39 #define SET_ALIGNMENT(alignment)   __attribute__((__aligned__((alignment))))\r
40 \r
41 // assembly implementations\r
42 extern "C"\r
43 {\r
44         void TransposeMatrix3Neon(float *mCol, float *pfResult);\r
45 }\r
46 \r
47 //----------------------------------------------------------------------------\r
48 //  TransposeMatrix3Scalar\r
49 //\r
50 /// Performs a transpose on a matrix3 using scalar math, storing the\r
51 /// result directly into system memory.\r
52 ///\r
53 /// @param  mCol      Input matrix. Must point to 3x 4 float values\r
54 /// @param  pfResult  [in] pointer to a float(matrix3). [out] Contains the result\r
55 //----------------------------------------------------------------------------\r
56 //inline const Matrix3 transpose( const Matrix3 & mat )\r
57 //{\r
58 //    return Matrix3(\r
59 //        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),\r
60 //        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),\r
61 //        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )\r
62 //    );\r
63 //}\r
64 void TransposeMatrix3Scalar(float *mCol, float *pfResult)\r
65 {\r
66         pfResult[0] = mCol[0];\r
67         pfResult[1] = mCol[4];\r
68         pfResult[2] = mCol[8];\r
69         pfResult[3] = 0.0f;\r
70 \r
71         pfResult[4] = mCol[1];\r
72         pfResult[5] = mCol[5];\r
73         pfResult[6] = mCol[9];\r
74         pfResult[7] = 0.0f;\r
75 \r
76         pfResult[8] = mCol[2];\r
77         pfResult[9] = mCol[6];\r
78         pfResult[10] = mCol[10];\r
79         pfResult[11] = 0.0f;\r
80 }\r
81 \r
82 //----------------------------------------------------------------------------\r
83 //  TestNeonTransposeMatrix3\r
84 //\r
85 /// Run timing study of the matrix3 transpose functions above, writing the\r
86 /// results to the Android verbose log.\r
87 //----------------------------------------------------------------------------\r
88 void TestNeonTransposeMatrix3()\r
89 {\r
90         float SET_ALIGNMENT(64) data[] = {float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f,\r
91                                                                           float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f,\r
92                                                                           float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),float(rand())/float(RAND_MAX),0.0f};\r
93         float *mCol = &data[0];\r
94 \r
95         char szMsg[256];\r
96 \r
97         sprintf(szMsg, "");\r
98         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
99         sprintf(szMsg,"---------------------------------------");\r
100         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
101         sprintf(szMsg,"TestNeonTransposeMatrix3 Start");\r
102         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
103         sprintf(szMsg, "Test input mCol:  <%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",\r
104                         mCol[0], mCol[1], mCol[2], mCol[3],\r
105                         mCol[4], mCol[5], mCol[6], mCol[7],\r
106                         mCol[8], mCol[9], mCol[10], mCol[11]);\r
107         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
108 \r
109         float SET_ALIGNMENT(64) fResult[12];\r
110     \r
111 \r
112         sce::PhysicsEffects::PfxPerfCounter pc;\r
113         double dTimeSpan, dRefTimeSpan;; \r
114         unsigned int uiNumTries = 10000000;\r
115         unsigned int i;\r
116 \r
117 // profile scalar TransposeMatrix3Scalar with direct memory return, c++ version\r
118         fResult[0] = 0.0f;\r
119         pc.countBegin("");\r
120         for(i = 0; i < uiNumTries; i++)\r
121         {\r
122                 TransposeMatrix3Scalar(mCol, fResult);\r
123         }\r
124         pc.countEnd();\r
125         dTimeSpan = pc.getCountTime(0);\r
126         pc.resetCount();\r
127         dRefTimeSpan = dTimeSpan;\r
128         sprintf(szMsg, "Time to do %i calls for TransposeMatrix3Scalar: %f secs, speedup: %5.2f, result value=<%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",\r
129                                         uiNumTries, dTimeSpan, dRefTimeSpan/dTimeSpan, fResult[0], fResult[1], fResult[2], fResult[3],\r
130                                                                                                                                    fResult[4], fResult[5], fResult[6], fResult[7],\r
131                                                                                                                                    fResult[8], fResult[9], fResult[10], fResult[11]);\r
132         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
133 \r
134 \r
135 // profile NEON TransposeMatrix3Neon with direct memory return, assembly version\r
136         fResult[0] = 0.0f;\r
137         pc.countBegin("");\r
138         for(i = 0; i < uiNumTries; i++)\r
139         {\r
140                 TransposeMatrix3Neon(mCol, fResult);\r
141         }\r
142         pc.countEnd();\r
143         dTimeSpan = pc.getCountTime(0);\r
144         pc.resetCount();\r
145         sprintf(szMsg, "Time to do %i calls for TransposeMatrix3Neon: %f secs, speedup: %5.2f, result value=<%f,%f,%f,%f> <%f,%f,%f,%f> <%f,%f,%f,%f>",\r
146                                         uiNumTries, dTimeSpan, dRefTimeSpan/dTimeSpan, fResult[0], fResult[1], fResult[2], fResult[3],\r
147                                                                                                                                    fResult[4], fResult[5], fResult[6], fResult[7],\r
148                                                                                                                                    fResult[8], fResult[9], fResult[10], fResult[11]);\r
149         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
150 \r
151 \r
152         sprintf(szMsg,"TestNeonTransposeMatrix3 End");\r
153         __android_log_write(ANDROID_LOG_VERBOSE,"PHYSICS TIMING STUDY", szMsg);\r
154 }\r