12 #define ALIGNED_ACCESS
13 #define MOVUPS_A movaps
14 #define MOVUPS_XL movaps
15 #define MOVUPS_XS movaps
16 #define MOVUPS_YL movaps
17 #define MOVUPS_YS movaps
18 #define PREFETCH prefetcht0
19 #define PREFETCHSIZE 64 * 3
23 #define ALIGNED_ACCESS
24 #define MOVUPS_A movaps
25 #define MOVUPS_XL movaps
26 #define MOVUPS_XS movaps
27 #define MOVUPS_YL movaps
28 #define MOVUPS_YS movaps
29 #define PREFETCH prefetcht0
30 #define PREFETCHSIZE 64 * 2
34 #define ALIGNED_ACCESS
35 #define MOVUPS_A movaps
36 #define MOVUPS_XL movaps
37 #define MOVUPS_XS movaps
38 #define MOVUPS_YL movaps
39 #define MOVUPS_YS movaps
40 #define PREFETCH prefetcht0
41 #define PREFETCHSIZE 64 * 4
45 #define ALIGNED_ACCESS
46 #define MOVUPS_A movaps
47 #define MOVUPS_XL movaps
48 #define MOVUPS_XS movaps
49 #define MOVUPS_YL movaps
50 #define MOVUPS_YS movaps
51 #define PREFETCH prefetcht0
52 #define PREFETCHSIZE 64 * 4
56 #define MOVUPS_A movups
57 #define MOVUPS_XL movups
58 #define MOVUPS_XS movups
59 #define MOVUPS_YL movups
60 #define MOVUPS_YS movups
61 #define PREFETCH prefetcht0
62 #define PREFETCHW prefetcht0
63 #define PREFETCHSIZE 64 * 3
67 #define MOVUPS_A movups
68 #define MOVUPS_XL movups
69 #define MOVUPS_XS movups
70 #define MOVUPS_YL movups
71 #define MOVUPS_YS movups
72 #define PREFETCH prefetcht0
73 #define PREFETCHW prefetcht0
74 #define PREFETCHSIZE 64 * 3
78 #define PREFETCH prefetch
79 #define PREFETCHW prefetchw
81 #define PREFETCHSIZE 64 * 1
83 #define PREFETCHSIZE 64 * 1
88 #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BARCELONA_OPTIMIZATION)
89 #define ALIGNED_ACCESS
90 #define MOVUPS_A movaps
91 #define MOVUPS_XL movaps
92 #define MOVUPS_XS movaps
93 #define MOVUPS_YL movaps
94 #define MOVUPS_YS movaps
96 #define PREFETCH prefetch
97 #define PREFETCHW prefetchw
99 #define PREFETCHSIZE 64 * 2
101 #define PREFETCHSIZE 64 * 4
106 #define ALIGNED_ACCESS
107 #define MOVUPS_A movaps
108 #define MOVUPS_XL movaps
109 #define MOVUPS_XS movaps
110 #define MOVUPS_YL movaps
111 #define MOVUPS_YS movaps
112 #define PREFETCH prefetcht0
114 #define PREFETCHSIZE 64 * 1
116 #define PREFETCHSIZE 64 * 2
121 #ifdef L1_DATA_LINESIZE
122 #define PREOFFSET (L1_DATA_LINESIZE >> 1)
129 #define GEMV_UNROLL 4
133 #define ZGEMV_UNROLL 4
136 /* #define COPY_FORCE */ /* Always copy X or Y to the buffer */
137 /* #define NOCOPY_UNALIGNED */ /* Not copy if X or Y is not aligned */
140 #define MOVUPS_A1(OFF, ADDR, REGS) MOVUPS_A OFF(ADDR), REGS
141 #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) MOVUPS_A OFF(ADDR, BASE, SCALE), REGS
143 #define MOVUPS_A1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
144 #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF(ADDR, BASE, SCALE), REGS; movhps OFF + 8(ADDR, BASE, SCALE), REGS
147 #define MOVRPS_A1(OFF, ADDR, REGS) movsd OFF + 8(ADDR), REGS; movhps OFF(ADDR), REGS
148 #define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF + 8(ADDR, BASE, SCALE), REGS; movhps OFF(ADDR, BASE, SCALE), REGS
151 #define MOVUPS_XL1(OFF, ADDR, REGS) MOVUPS_XL OFF(ADDR), REGS
153 #define MOVUPS_XL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
157 #define MOVUPS_XS1(OFF, ADDR, REGS) MOVUPS_XS REGS, OFF(ADDR)
159 #define MOVUPS_XS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
163 #define MOVUPS_YL1(OFF, ADDR, REGS) MOVUPS_YL OFF(ADDR), REGS
165 #define MOVUPS_YL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
169 #define MOVUPS_YS1(OFF, ADDR, REGS) MOVUPS_YS REGS, OFF(ADDR)
171 #define MOVUPS_YS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)