Refs #154. Fixed gemv_t bug about overflow 16MB buffer on x86.
authorZhang Xianyi <traits.zhang@gmail.com>
Sun, 20 Jan 2013 13:22:12 +0000 (21:22 +0800)
committerZhang Xianyi <traits.zhang@gmail.com>
Sun, 20 Jan 2013 13:22:12 +0000 (21:22 +0800)
kernel/x86/gemv_t_sse.S
kernel/x86/gemv_t_sse2.S

index fa6cfc5..326584b 100644 (file)
 .L0t:
        xorl    J,J
        addl    $1,J
-       sall    $21,J                           # J=2^22
+       sall    $22,J                           # J=2^24*sizeof(float)=buffer size(16MB)
+       subl    $8, J                           # Don't use last 8 float in the buffer.
+                                               # Now, split M by block J
        subl    J,MMM                           # MMM=MMM-J
        movl    J,M             
        jge             .L00t
index d46d7e4..60d6ef2 100644 (file)
 .L0t:
        xorl    J,J
        addl    $1,J
-       sall    $22,J                           # J=2^22
+       sall    $21,J                           # J=2^21*sizeof(double)=buffer size(16MB)
+       subl    $4, J                           # Don't use last 4 double in the buffer.
+                                               # Now, split M by block J
        subl    J,MMM                           # MMM=MMM-J
        movl    J,M             
        jge             .L00t