2 @ Applied Research Associates Inc. (c)2011
\r
4 @ Redistribution and use in source and binary forms,
\r
5 @ with or without modification, are permitted provided that the
\r
6 @ following conditions are met:
\r
7 @ * Redistributions of source code must retain the above copyright
\r
8 @ notice, this list of conditions and the following disclaimer.
\r
9 @ * Redistributions in binary form must reproduce the above copyright
\r
10 @ notice, this list of conditions and the following disclaimer in the
\r
11 @ documentation and/or other materials provided with the distribution.
\r
12 @ * Neither the name of the Applied Research Associates Inc nor the names
\r
13 @ of its contributors may be used to endorse or promote products derived
\r
14 @ from this software without specific prior written permission.
\r
16 @ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
\r
17 @ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
\r
18 @ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
\r
19 @ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
\r
20 @ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
\r
21 @ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
\r
22 @ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
\r
23 @ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
\r
24 @ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
\r
25 @ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
\r
26 @ POSSIBILITY OF SUCH DAMAGE.
\r
35 @ matrix3 operator *, result stored directly to memory
\r
37 .global Matrix3OperatorMultiplyNeon
\r
39 Matrix3OperatorMultiplyNeon:
\r
41 vld1.32 {d16-d19}, [r0]! @ load first eight elements of matrix 0
\r
42 vld1.32 {d20-d21}, [r0] @ load second eight elements of matrix 0
\r
43 vld1.32 {d0-d3}, [r1]! @ load first eight elements of matrix 1
\r
44 vld1.32 {d4-d5}, [r1] @ load second eight elements of matrix 1
\r
46 vmul.f32 q12, q8, d0[0] @ rslt col0 = (mat0 col0) * (mat1 col0 elt0)
\r
47 vmul.f32 q13, q8, d2[0] @ rslt col1 = (mat0 col0) * (mat1 col1 elt0)
\r
48 vmul.f32 q14, q8, d4[0] @ rslt col2 = (mat0 col0) * (mat1 col2 elt0)
\r
50 vmla.f32 q12, q9, d0[1] @ rslt col0 += (mat0 col1) * (mat1 col0 elt1)
\r
51 vmla.f32 q13, q9, d2[1] @ rslt col1 += (mat0 col1) * (mat1 col1 elt1)
\r
52 vmla.f32 q14, q9, d4[1] @ rslt col2 += (mat0 col1) * (mat1 col2 elt1)
\r
54 vmla.f32 q12, q10, d1[0] @ rslt col0 += (mat0 col2) * (mat1 col0 elt2)
\r
55 vmla.f32 q13, q10, d3[0] @ rslt col1 += (mat0 col2) * (mat1 col1 elt2)
\r
56 vmla.f32 q14, q10, d5[0] @ rslt col2 += (mat0 col2) * (mat1 col2 elt2)
\r
58 vst1.32 {d24-d27}, [r2]! @ store first eight elements of result
\r
59 vst1.32 {d28-d29}, [r2] @ store second eight elements of result
\r