From 3d5e792c72cd6d9894c4583c527e058481816657 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Wed, 27 Nov 2013 18:38:32 +0100 Subject: [PATCH] optimized sgemm kernel for ARMV6 --- kernel/arm/sgemm_kernel_4x2_vfp.S | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S index 3e20f86..e074e74 100644 --- a/kernel/arm/sgemm_kernel_4x2_vfp.S +++ b/kernel/arm/sgemm_kernel_4x2_vfp.S @@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /************************************************************************************** -* 2013/11/23 Saar +* 2013/11/27 Saar * BLASTEST : OK * CTEST : OK * TEST : OK @@ -101,16 +101,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro KERNEL4x2_SUB flds s4 , [ BO ] - flds s5 , [ BO, #4 ] flds s0 , [ AO ] flds s1 , [ AO, #4 ] - flds s2 , [ AO, #8 ] - flds s3 , [ AO, #12 ] fmacs s8 , s0, s4 + flds s2 , [ AO, #8 ] fmacs s9 , s1, s4 + flds s3 , [ AO, #12 ] fmacs s10 , s2, s4 + flds s5 , [ BO, #4 ] fmacs s11 , s3, s4 fmacs s12 , s0, s5 @@ -469,13 +469,20 @@ sgemm_kernel_L2_M4_20: .align 5 sgemm_kernel_L2_M4_22: + + pld [ AO, #A_PRE ] + pld [ BO, #B_PRE ] KERNEL4x2_SUB KERNEL4x2_SUB + pld [ AO, #A_PRE ] KERNEL4x2_SUB KERNEL4x2_SUB + pld [ AO, #A_PRE ] + pld [ BO, #B_PRE ] KERNEL4x2_SUB KERNEL4x2_SUB + pld [ AO, #A_PRE ] KERNEL4x2_SUB KERNEL4x2_SUB -- 2.7.4