Merge tag 'vfs-5.18-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[platform/kernel/linux-starfive.git] / include / asm-generic / xor.h
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * include/asm-generic/xor.h
4  *
5  * Generic optimized RAID-5 checksumming functions.
6  */
7
8 #include <linux/prefetch.h>
9
10 static void
11 xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
12             const unsigned long * __restrict p2)
13 {
14         long lines = bytes / (sizeof (long)) / 8;
15
16         do {
17                 p1[0] ^= p2[0];
18                 p1[1] ^= p2[1];
19                 p1[2] ^= p2[2];
20                 p1[3] ^= p2[3];
21                 p1[4] ^= p2[4];
22                 p1[5] ^= p2[5];
23                 p1[6] ^= p2[6];
24                 p1[7] ^= p2[7];
25                 p1 += 8;
26                 p2 += 8;
27         } while (--lines > 0);
28 }
29
30 static void
31 xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
32             const unsigned long * __restrict p2,
33             const unsigned long * __restrict p3)
34 {
35         long lines = bytes / (sizeof (long)) / 8;
36
37         do {
38                 p1[0] ^= p2[0] ^ p3[0];
39                 p1[1] ^= p2[1] ^ p3[1];
40                 p1[2] ^= p2[2] ^ p3[2];
41                 p1[3] ^= p2[3] ^ p3[3];
42                 p1[4] ^= p2[4] ^ p3[4];
43                 p1[5] ^= p2[5] ^ p3[5];
44                 p1[6] ^= p2[6] ^ p3[6];
45                 p1[7] ^= p2[7] ^ p3[7];
46                 p1 += 8;
47                 p2 += 8;
48                 p3 += 8;
49         } while (--lines > 0);
50 }
51
52 static void
53 xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
54             const unsigned long * __restrict p2,
55             const unsigned long * __restrict p3,
56             const unsigned long * __restrict p4)
57 {
58         long lines = bytes / (sizeof (long)) / 8;
59
60         do {
61                 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
62                 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
63                 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
64                 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
65                 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
66                 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
67                 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
68                 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
69                 p1 += 8;
70                 p2 += 8;
71                 p3 += 8;
72                 p4 += 8;
73         } while (--lines > 0);
74 }
75
76 static void
77 xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
78             const unsigned long * __restrict p2,
79             const unsigned long * __restrict p3,
80             const unsigned long * __restrict p4,
81             const unsigned long * __restrict p5)
82 {
83         long lines = bytes / (sizeof (long)) / 8;
84
85         do {
86                 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
87                 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
88                 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
89                 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
90                 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
91                 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
92                 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
93                 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
94                 p1 += 8;
95                 p2 += 8;
96                 p3 += 8;
97                 p4 += 8;
98                 p5 += 8;
99         } while (--lines > 0);
100 }
101
102 static void
103 xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
104              const unsigned long * __restrict p2)
105 {
106         long lines = bytes / (sizeof (long)) / 8;
107
108         do {
109                 register long d0, d1, d2, d3, d4, d5, d6, d7;
110                 d0 = p1[0];     /* Pull the stuff into registers        */
111                 d1 = p1[1];     /*  ... in bursts, if possible.         */
112                 d2 = p1[2];
113                 d3 = p1[3];
114                 d4 = p1[4];
115                 d5 = p1[5];
116                 d6 = p1[6];
117                 d7 = p1[7];
118                 d0 ^= p2[0];
119                 d1 ^= p2[1];
120                 d2 ^= p2[2];
121                 d3 ^= p2[3];
122                 d4 ^= p2[4];
123                 d5 ^= p2[5];
124                 d6 ^= p2[6];
125                 d7 ^= p2[7];
126                 p1[0] = d0;     /* Store the result (in bursts)         */
127                 p1[1] = d1;
128                 p1[2] = d2;
129                 p1[3] = d3;
130                 p1[4] = d4;
131                 p1[5] = d5;
132                 p1[6] = d6;
133                 p1[7] = d7;
134                 p1 += 8;
135                 p2 += 8;
136         } while (--lines > 0);
137 }
138
139 static void
140 xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
141              const unsigned long * __restrict p2,
142              const unsigned long * __restrict p3)
143 {
144         long lines = bytes / (sizeof (long)) / 8;
145
146         do {
147                 register long d0, d1, d2, d3, d4, d5, d6, d7;
148                 d0 = p1[0];     /* Pull the stuff into registers        */
149                 d1 = p1[1];     /*  ... in bursts, if possible.         */
150                 d2 = p1[2];
151                 d3 = p1[3];
152                 d4 = p1[4];
153                 d5 = p1[5];
154                 d6 = p1[6];
155                 d7 = p1[7];
156                 d0 ^= p2[0];
157                 d1 ^= p2[1];
158                 d2 ^= p2[2];
159                 d3 ^= p2[3];
160                 d4 ^= p2[4];
161                 d5 ^= p2[5];
162                 d6 ^= p2[6];
163                 d7 ^= p2[7];
164                 d0 ^= p3[0];
165                 d1 ^= p3[1];
166                 d2 ^= p3[2];
167                 d3 ^= p3[3];
168                 d4 ^= p3[4];
169                 d5 ^= p3[5];
170                 d6 ^= p3[6];
171                 d7 ^= p3[7];
172                 p1[0] = d0;     /* Store the result (in bursts)         */
173                 p1[1] = d1;
174                 p1[2] = d2;
175                 p1[3] = d3;
176                 p1[4] = d4;
177                 p1[5] = d5;
178                 p1[6] = d6;
179                 p1[7] = d7;
180                 p1 += 8;
181                 p2 += 8;
182                 p3 += 8;
183         } while (--lines > 0);
184 }
185
186 static void
187 xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
188              const unsigned long * __restrict p2,
189              const unsigned long * __restrict p3,
190              const unsigned long * __restrict p4)
191 {
192         long lines = bytes / (sizeof (long)) / 8;
193
194         do {
195                 register long d0, d1, d2, d3, d4, d5, d6, d7;
196                 d0 = p1[0];     /* Pull the stuff into registers        */
197                 d1 = p1[1];     /*  ... in bursts, if possible.         */
198                 d2 = p1[2];
199                 d3 = p1[3];
200                 d4 = p1[4];
201                 d5 = p1[5];
202                 d6 = p1[6];
203                 d7 = p1[7];
204                 d0 ^= p2[0];
205                 d1 ^= p2[1];
206                 d2 ^= p2[2];
207                 d3 ^= p2[3];
208                 d4 ^= p2[4];
209                 d5 ^= p2[5];
210                 d6 ^= p2[6];
211                 d7 ^= p2[7];
212                 d0 ^= p3[0];
213                 d1 ^= p3[1];
214                 d2 ^= p3[2];
215                 d3 ^= p3[3];
216                 d4 ^= p3[4];
217                 d5 ^= p3[5];
218                 d6 ^= p3[6];
219                 d7 ^= p3[7];
220                 d0 ^= p4[0];
221                 d1 ^= p4[1];
222                 d2 ^= p4[2];
223                 d3 ^= p4[3];
224                 d4 ^= p4[4];
225                 d5 ^= p4[5];
226                 d6 ^= p4[6];
227                 d7 ^= p4[7];
228                 p1[0] = d0;     /* Store the result (in bursts)         */
229                 p1[1] = d1;
230                 p1[2] = d2;
231                 p1[3] = d3;
232                 p1[4] = d4;
233                 p1[5] = d5;
234                 p1[6] = d6;
235                 p1[7] = d7;
236                 p1 += 8;
237                 p2 += 8;
238                 p3 += 8;
239                 p4 += 8;
240         } while (--lines > 0);
241 }
242
243 static void
244 xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
245              const unsigned long * __restrict p2,
246              const unsigned long * __restrict p3,
247              const unsigned long * __restrict p4,
248              const unsigned long * __restrict p5)
249 {
250         long lines = bytes / (sizeof (long)) / 8;
251
252         do {
253                 register long d0, d1, d2, d3, d4, d5, d6, d7;
254                 d0 = p1[0];     /* Pull the stuff into registers        */
255                 d1 = p1[1];     /*  ... in bursts, if possible.         */
256                 d2 = p1[2];
257                 d3 = p1[3];
258                 d4 = p1[4];
259                 d5 = p1[5];
260                 d6 = p1[6];
261                 d7 = p1[7];
262                 d0 ^= p2[0];
263                 d1 ^= p2[1];
264                 d2 ^= p2[2];
265                 d3 ^= p2[3];
266                 d4 ^= p2[4];
267                 d5 ^= p2[5];
268                 d6 ^= p2[6];
269                 d7 ^= p2[7];
270                 d0 ^= p3[0];
271                 d1 ^= p3[1];
272                 d2 ^= p3[2];
273                 d3 ^= p3[3];
274                 d4 ^= p3[4];
275                 d5 ^= p3[5];
276                 d6 ^= p3[6];
277                 d7 ^= p3[7];
278                 d0 ^= p4[0];
279                 d1 ^= p4[1];
280                 d2 ^= p4[2];
281                 d3 ^= p4[3];
282                 d4 ^= p4[4];
283                 d5 ^= p4[5];
284                 d6 ^= p4[6];
285                 d7 ^= p4[7];
286                 d0 ^= p5[0];
287                 d1 ^= p5[1];
288                 d2 ^= p5[2];
289                 d3 ^= p5[3];
290                 d4 ^= p5[4];
291                 d5 ^= p5[5];
292                 d6 ^= p5[6];
293                 d7 ^= p5[7];
294                 p1[0] = d0;     /* Store the result (in bursts)         */
295                 p1[1] = d1;
296                 p1[2] = d2;
297                 p1[3] = d3;
298                 p1[4] = d4;
299                 p1[5] = d5;
300                 p1[6] = d6;
301                 p1[7] = d7;
302                 p1 += 8;
303                 p2 += 8;
304                 p3 += 8;
305                 p4 += 8;
306                 p5 += 8;
307         } while (--lines > 0);
308 }
309
310 static void
311 xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
312               const unsigned long * __restrict p2)
313 {
314         long lines = bytes / (sizeof (long)) / 8 - 1;
315         prefetchw(p1);
316         prefetch(p2);
317
318         do {
319                 prefetchw(p1+8);
320                 prefetch(p2+8);
321  once_more:
322                 p1[0] ^= p2[0];
323                 p1[1] ^= p2[1];
324                 p1[2] ^= p2[2];
325                 p1[3] ^= p2[3];
326                 p1[4] ^= p2[4];
327                 p1[5] ^= p2[5];
328                 p1[6] ^= p2[6];
329                 p1[7] ^= p2[7];
330                 p1 += 8;
331                 p2 += 8;
332         } while (--lines > 0);
333         if (lines == 0)
334                 goto once_more;
335 }
336
337 static void
338 xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
339               const unsigned long * __restrict p2,
340               const unsigned long * __restrict p3)
341 {
342         long lines = bytes / (sizeof (long)) / 8 - 1;
343         prefetchw(p1);
344         prefetch(p2);
345         prefetch(p3);
346
347         do {
348                 prefetchw(p1+8);
349                 prefetch(p2+8);
350                 prefetch(p3+8);
351  once_more:
352                 p1[0] ^= p2[0] ^ p3[0];
353                 p1[1] ^= p2[1] ^ p3[1];
354                 p1[2] ^= p2[2] ^ p3[2];
355                 p1[3] ^= p2[3] ^ p3[3];
356                 p1[4] ^= p2[4] ^ p3[4];
357                 p1[5] ^= p2[5] ^ p3[5];
358                 p1[6] ^= p2[6] ^ p3[6];
359                 p1[7] ^= p2[7] ^ p3[7];
360                 p1 += 8;
361                 p2 += 8;
362                 p3 += 8;
363         } while (--lines > 0);
364         if (lines == 0)
365                 goto once_more;
366 }
367
368 static void
369 xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
370               const unsigned long * __restrict p2,
371               const unsigned long * __restrict p3,
372               const unsigned long * __restrict p4)
373 {
374         long lines = bytes / (sizeof (long)) / 8 - 1;
375
376         prefetchw(p1);
377         prefetch(p2);
378         prefetch(p3);
379         prefetch(p4);
380
381         do {
382                 prefetchw(p1+8);
383                 prefetch(p2+8);
384                 prefetch(p3+8);
385                 prefetch(p4+8);
386  once_more:
387                 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
388                 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
389                 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
390                 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
391                 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
392                 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
393                 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
394                 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
395                 p1 += 8;
396                 p2 += 8;
397                 p3 += 8;
398                 p4 += 8;
399         } while (--lines > 0);
400         if (lines == 0)
401                 goto once_more;
402 }
403
404 static void
405 xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
406               const unsigned long * __restrict p2,
407               const unsigned long * __restrict p3,
408               const unsigned long * __restrict p4,
409               const unsigned long * __restrict p5)
410 {
411         long lines = bytes / (sizeof (long)) / 8 - 1;
412
413         prefetchw(p1);
414         prefetch(p2);
415         prefetch(p3);
416         prefetch(p4);
417         prefetch(p5);
418
419         do {
420                 prefetchw(p1+8);
421                 prefetch(p2+8);
422                 prefetch(p3+8);
423                 prefetch(p4+8);
424                 prefetch(p5+8);
425  once_more:
426                 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
427                 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
428                 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
429                 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
430                 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
431                 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
432                 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
433                 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
434                 p1 += 8;
435                 p2 += 8;
436                 p3 += 8;
437                 p4 += 8;
438                 p5 += 8;
439         } while (--lines > 0);
440         if (lines == 0)
441                 goto once_more;
442 }
443
444 static void
445 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
446                const unsigned long * __restrict p2)
447 {
448         long lines = bytes / (sizeof (long)) / 8 - 1;
449
450         prefetchw(p1);
451         prefetch(p2);
452
453         do {
454                 register long d0, d1, d2, d3, d4, d5, d6, d7;
455
456                 prefetchw(p1+8);
457                 prefetch(p2+8);
458  once_more:
459                 d0 = p1[0];     /* Pull the stuff into registers        */
460                 d1 = p1[1];     /*  ... in bursts, if possible.         */
461                 d2 = p1[2];
462                 d3 = p1[3];
463                 d4 = p1[4];
464                 d5 = p1[5];
465                 d6 = p1[6];
466                 d7 = p1[7];
467                 d0 ^= p2[0];
468                 d1 ^= p2[1];
469                 d2 ^= p2[2];
470                 d3 ^= p2[3];
471                 d4 ^= p2[4];
472                 d5 ^= p2[5];
473                 d6 ^= p2[6];
474                 d7 ^= p2[7];
475                 p1[0] = d0;     /* Store the result (in bursts)         */
476                 p1[1] = d1;
477                 p1[2] = d2;
478                 p1[3] = d3;
479                 p1[4] = d4;
480                 p1[5] = d5;
481                 p1[6] = d6;
482                 p1[7] = d7;
483                 p1 += 8;
484                 p2 += 8;
485         } while (--lines > 0);
486         if (lines == 0)
487                 goto once_more;
488 }
489
490 static void
491 xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
492                const unsigned long * __restrict p2,
493                const unsigned long * __restrict p3)
494 {
495         long lines = bytes / (sizeof (long)) / 8 - 1;
496
497         prefetchw(p1);
498         prefetch(p2);
499         prefetch(p3);
500
501         do {
502                 register long d0, d1, d2, d3, d4, d5, d6, d7;
503
504                 prefetchw(p1+8);
505                 prefetch(p2+8);
506                 prefetch(p3+8);
507  once_more:
508                 d0 = p1[0];     /* Pull the stuff into registers        */
509                 d1 = p1[1];     /*  ... in bursts, if possible.         */
510                 d2 = p1[2];
511                 d3 = p1[3];
512                 d4 = p1[4];
513                 d5 = p1[5];
514                 d6 = p1[6];
515                 d7 = p1[7];
516                 d0 ^= p2[0];
517                 d1 ^= p2[1];
518                 d2 ^= p2[2];
519                 d3 ^= p2[3];
520                 d4 ^= p2[4];
521                 d5 ^= p2[5];
522                 d6 ^= p2[6];
523                 d7 ^= p2[7];
524                 d0 ^= p3[0];
525                 d1 ^= p3[1];
526                 d2 ^= p3[2];
527                 d3 ^= p3[3];
528                 d4 ^= p3[4];
529                 d5 ^= p3[5];
530                 d6 ^= p3[6];
531                 d7 ^= p3[7];
532                 p1[0] = d0;     /* Store the result (in bursts)         */
533                 p1[1] = d1;
534                 p1[2] = d2;
535                 p1[3] = d3;
536                 p1[4] = d4;
537                 p1[5] = d5;
538                 p1[6] = d6;
539                 p1[7] = d7;
540                 p1 += 8;
541                 p2 += 8;
542                 p3 += 8;
543         } while (--lines > 0);
544         if (lines == 0)
545                 goto once_more;
546 }
547
548 static void
549 xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
550                const unsigned long * __restrict p2,
551                const unsigned long * __restrict p3,
552                const unsigned long * __restrict p4)
553 {
554         long lines = bytes / (sizeof (long)) / 8 - 1;
555
556         prefetchw(p1);
557         prefetch(p2);
558         prefetch(p3);
559         prefetch(p4);
560
561         do {
562                 register long d0, d1, d2, d3, d4, d5, d6, d7;
563
564                 prefetchw(p1+8);
565                 prefetch(p2+8);
566                 prefetch(p3+8);
567                 prefetch(p4+8);
568  once_more:
569                 d0 = p1[0];     /* Pull the stuff into registers        */
570                 d1 = p1[1];     /*  ... in bursts, if possible.         */
571                 d2 = p1[2];
572                 d3 = p1[3];
573                 d4 = p1[4];
574                 d5 = p1[5];
575                 d6 = p1[6];
576                 d7 = p1[7];
577                 d0 ^= p2[0];
578                 d1 ^= p2[1];
579                 d2 ^= p2[2];
580                 d3 ^= p2[3];
581                 d4 ^= p2[4];
582                 d5 ^= p2[5];
583                 d6 ^= p2[6];
584                 d7 ^= p2[7];
585                 d0 ^= p3[0];
586                 d1 ^= p3[1];
587                 d2 ^= p3[2];
588                 d3 ^= p3[3];
589                 d4 ^= p3[4];
590                 d5 ^= p3[5];
591                 d6 ^= p3[6];
592                 d7 ^= p3[7];
593                 d0 ^= p4[0];
594                 d1 ^= p4[1];
595                 d2 ^= p4[2];
596                 d3 ^= p4[3];
597                 d4 ^= p4[4];
598                 d5 ^= p4[5];
599                 d6 ^= p4[6];
600                 d7 ^= p4[7];
601                 p1[0] = d0;     /* Store the result (in bursts)         */
602                 p1[1] = d1;
603                 p1[2] = d2;
604                 p1[3] = d3;
605                 p1[4] = d4;
606                 p1[5] = d5;
607                 p1[6] = d6;
608                 p1[7] = d7;
609                 p1 += 8;
610                 p2 += 8;
611                 p3 += 8;
612                 p4 += 8;
613         } while (--lines > 0);
614         if (lines == 0)
615                 goto once_more;
616 }
617
618 static void
619 xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
620                const unsigned long * __restrict p2,
621                const unsigned long * __restrict p3,
622                const unsigned long * __restrict p4,
623                const unsigned long * __restrict p5)
624 {
625         long lines = bytes / (sizeof (long)) / 8 - 1;
626
627         prefetchw(p1);
628         prefetch(p2);
629         prefetch(p3);
630         prefetch(p4);
631         prefetch(p5);
632
633         do {
634                 register long d0, d1, d2, d3, d4, d5, d6, d7;
635
636                 prefetchw(p1+8);
637                 prefetch(p2+8);
638                 prefetch(p3+8);
639                 prefetch(p4+8);
640                 prefetch(p5+8);
641  once_more:
642                 d0 = p1[0];     /* Pull the stuff into registers        */
643                 d1 = p1[1];     /*  ... in bursts, if possible.         */
644                 d2 = p1[2];
645                 d3 = p1[3];
646                 d4 = p1[4];
647                 d5 = p1[5];
648                 d6 = p1[6];
649                 d7 = p1[7];
650                 d0 ^= p2[0];
651                 d1 ^= p2[1];
652                 d2 ^= p2[2];
653                 d3 ^= p2[3];
654                 d4 ^= p2[4];
655                 d5 ^= p2[5];
656                 d6 ^= p2[6];
657                 d7 ^= p2[7];
658                 d0 ^= p3[0];
659                 d1 ^= p3[1];
660                 d2 ^= p3[2];
661                 d3 ^= p3[3];
662                 d4 ^= p3[4];
663                 d5 ^= p3[5];
664                 d6 ^= p3[6];
665                 d7 ^= p3[7];
666                 d0 ^= p4[0];
667                 d1 ^= p4[1];
668                 d2 ^= p4[2];
669                 d3 ^= p4[3];
670                 d4 ^= p4[4];
671                 d5 ^= p4[5];
672                 d6 ^= p4[6];
673                 d7 ^= p4[7];
674                 d0 ^= p5[0];
675                 d1 ^= p5[1];
676                 d2 ^= p5[2];
677                 d3 ^= p5[3];
678                 d4 ^= p5[4];
679                 d5 ^= p5[5];
680                 d6 ^= p5[6];
681                 d7 ^= p5[7];
682                 p1[0] = d0;     /* Store the result (in bursts)         */
683                 p1[1] = d1;
684                 p1[2] = d2;
685                 p1[3] = d3;
686                 p1[4] = d4;
687                 p1[5] = d5;
688                 p1[6] = d6;
689                 p1[7] = d7;
690                 p1 += 8;
691                 p2 += 8;
692                 p3 += 8;
693                 p4 += 8;
694                 p5 += 8;
695         } while (--lines > 0);
696         if (lines == 0)
697                 goto once_more;
698 }
699
700 static struct xor_block_template xor_block_8regs = {
701         .name = "8regs",
702         .do_2 = xor_8regs_2,
703         .do_3 = xor_8regs_3,
704         .do_4 = xor_8regs_4,
705         .do_5 = xor_8regs_5,
706 };
707
708 static struct xor_block_template xor_block_32regs = {
709         .name = "32regs",
710         .do_2 = xor_32regs_2,
711         .do_3 = xor_32regs_3,
712         .do_4 = xor_32regs_4,
713         .do_5 = xor_32regs_5,
714 };
715
716 static struct xor_block_template xor_block_8regs_p __maybe_unused = {
717         .name = "8regs_prefetch",
718         .do_2 = xor_8regs_p_2,
719         .do_3 = xor_8regs_p_3,
720         .do_4 = xor_8regs_p_4,
721         .do_5 = xor_8regs_p_5,
722 };
723
724 static struct xor_block_template xor_block_32regs_p __maybe_unused = {
725         .name = "32regs_prefetch",
726         .do_2 = xor_32regs_p_2,
727         .do_3 = xor_32regs_p_3,
728         .do_4 = xor_32regs_p_4,
729         .do_5 = xor_32regs_p_5,
730 };
731
732 #define XOR_TRY_TEMPLATES                       \
733         do {                                    \
734                 xor_speed(&xor_block_8regs);    \
735                 xor_speed(&xor_block_8regs_p);  \
736                 xor_speed(&xor_block_32regs);   \
737                 xor_speed(&xor_block_32regs_p); \
738         } while (0)