1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * include/asm-generic/xor.h
5 * Generic optimized RAID-5 checksumming functions.
8 #include <linux/prefetch.h>
11 xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
12 const unsigned long * __restrict p2)
14 long lines = bytes / (sizeof (long)) / 8;
27 } while (--lines > 0);
31 xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
32 const unsigned long * __restrict p2,
33 const unsigned long * __restrict p3)
35 long lines = bytes / (sizeof (long)) / 8;
38 p1[0] ^= p2[0] ^ p3[0];
39 p1[1] ^= p2[1] ^ p3[1];
40 p1[2] ^= p2[2] ^ p3[2];
41 p1[3] ^= p2[3] ^ p3[3];
42 p1[4] ^= p2[4] ^ p3[4];
43 p1[5] ^= p2[5] ^ p3[5];
44 p1[6] ^= p2[6] ^ p3[6];
45 p1[7] ^= p2[7] ^ p3[7];
49 } while (--lines > 0);
53 xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
54 const unsigned long * __restrict p2,
55 const unsigned long * __restrict p3,
56 const unsigned long * __restrict p4)
58 long lines = bytes / (sizeof (long)) / 8;
61 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
62 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
63 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
64 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
65 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
66 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
67 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
68 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
73 } while (--lines > 0);
77 xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
78 const unsigned long * __restrict p2,
79 const unsigned long * __restrict p3,
80 const unsigned long * __restrict p4,
81 const unsigned long * __restrict p5)
83 long lines = bytes / (sizeof (long)) / 8;
86 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
87 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
88 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
89 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
90 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
91 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
92 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
93 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
99 } while (--lines > 0);
103 xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
104 const unsigned long * __restrict p2)
106 long lines = bytes / (sizeof (long)) / 8;
109 register long d0, d1, d2, d3, d4, d5, d6, d7;
110 d0 = p1[0]; /* Pull the stuff into registers */
111 d1 = p1[1]; /* ... in bursts, if possible. */
126 p1[0] = d0; /* Store the result (in bursts) */
136 } while (--lines > 0);
140 xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
141 const unsigned long * __restrict p2,
142 const unsigned long * __restrict p3)
144 long lines = bytes / (sizeof (long)) / 8;
147 register long d0, d1, d2, d3, d4, d5, d6, d7;
148 d0 = p1[0]; /* Pull the stuff into registers */
149 d1 = p1[1]; /* ... in bursts, if possible. */
172 p1[0] = d0; /* Store the result (in bursts) */
183 } while (--lines > 0);
187 xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
188 const unsigned long * __restrict p2,
189 const unsigned long * __restrict p3,
190 const unsigned long * __restrict p4)
192 long lines = bytes / (sizeof (long)) / 8;
195 register long d0, d1, d2, d3, d4, d5, d6, d7;
196 d0 = p1[0]; /* Pull the stuff into registers */
197 d1 = p1[1]; /* ... in bursts, if possible. */
228 p1[0] = d0; /* Store the result (in bursts) */
240 } while (--lines > 0);
244 xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
245 const unsigned long * __restrict p2,
246 const unsigned long * __restrict p3,
247 const unsigned long * __restrict p4,
248 const unsigned long * __restrict p5)
250 long lines = bytes / (sizeof (long)) / 8;
253 register long d0, d1, d2, d3, d4, d5, d6, d7;
254 d0 = p1[0]; /* Pull the stuff into registers */
255 d1 = p1[1]; /* ... in bursts, if possible. */
294 p1[0] = d0; /* Store the result (in bursts) */
307 } while (--lines > 0);
311 xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
312 const unsigned long * __restrict p2)
314 long lines = bytes / (sizeof (long)) / 8 - 1;
332 } while (--lines > 0);
338 xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
339 const unsigned long * __restrict p2,
340 const unsigned long * __restrict p3)
342 long lines = bytes / (sizeof (long)) / 8 - 1;
352 p1[0] ^= p2[0] ^ p3[0];
353 p1[1] ^= p2[1] ^ p3[1];
354 p1[2] ^= p2[2] ^ p3[2];
355 p1[3] ^= p2[3] ^ p3[3];
356 p1[4] ^= p2[4] ^ p3[4];
357 p1[5] ^= p2[5] ^ p3[5];
358 p1[6] ^= p2[6] ^ p3[6];
359 p1[7] ^= p2[7] ^ p3[7];
363 } while (--lines > 0);
369 xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
370 const unsigned long * __restrict p2,
371 const unsigned long * __restrict p3,
372 const unsigned long * __restrict p4)
374 long lines = bytes / (sizeof (long)) / 8 - 1;
387 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
388 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
389 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
390 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
391 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
392 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
393 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
394 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
399 } while (--lines > 0);
405 xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
406 const unsigned long * __restrict p2,
407 const unsigned long * __restrict p3,
408 const unsigned long * __restrict p4,
409 const unsigned long * __restrict p5)
411 long lines = bytes / (sizeof (long)) / 8 - 1;
426 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
427 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
428 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
429 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
430 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
431 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
432 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
433 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
439 } while (--lines > 0);
445 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
446 const unsigned long * __restrict p2)
448 long lines = bytes / (sizeof (long)) / 8 - 1;
454 register long d0, d1, d2, d3, d4, d5, d6, d7;
459 d0 = p1[0]; /* Pull the stuff into registers */
460 d1 = p1[1]; /* ... in bursts, if possible. */
475 p1[0] = d0; /* Store the result (in bursts) */
485 } while (--lines > 0);
491 xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
492 const unsigned long * __restrict p2,
493 const unsigned long * __restrict p3)
495 long lines = bytes / (sizeof (long)) / 8 - 1;
502 register long d0, d1, d2, d3, d4, d5, d6, d7;
508 d0 = p1[0]; /* Pull the stuff into registers */
509 d1 = p1[1]; /* ... in bursts, if possible. */
532 p1[0] = d0; /* Store the result (in bursts) */
543 } while (--lines > 0);
549 xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
550 const unsigned long * __restrict p2,
551 const unsigned long * __restrict p3,
552 const unsigned long * __restrict p4)
554 long lines = bytes / (sizeof (long)) / 8 - 1;
562 register long d0, d1, d2, d3, d4, d5, d6, d7;
569 d0 = p1[0]; /* Pull the stuff into registers */
570 d1 = p1[1]; /* ... in bursts, if possible. */
601 p1[0] = d0; /* Store the result (in bursts) */
613 } while (--lines > 0);
619 xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
620 const unsigned long * __restrict p2,
621 const unsigned long * __restrict p3,
622 const unsigned long * __restrict p4,
623 const unsigned long * __restrict p5)
625 long lines = bytes / (sizeof (long)) / 8 - 1;
634 register long d0, d1, d2, d3, d4, d5, d6, d7;
642 d0 = p1[0]; /* Pull the stuff into registers */
643 d1 = p1[1]; /* ... in bursts, if possible. */
682 p1[0] = d0; /* Store the result (in bursts) */
695 } while (--lines > 0);
700 static struct xor_block_template xor_block_8regs = {
708 static struct xor_block_template xor_block_32regs = {
710 .do_2 = xor_32regs_2,
711 .do_3 = xor_32regs_3,
712 .do_4 = xor_32regs_4,
713 .do_5 = xor_32regs_5,
716 static struct xor_block_template xor_block_8regs_p __maybe_unused = {
717 .name = "8regs_prefetch",
718 .do_2 = xor_8regs_p_2,
719 .do_3 = xor_8regs_p_3,
720 .do_4 = xor_8regs_p_4,
721 .do_5 = xor_8regs_p_5,
724 static struct xor_block_template xor_block_32regs_p __maybe_unused = {
725 .name = "32regs_prefetch",
726 .do_2 = xor_32regs_p_2,
727 .do_3 = xor_32regs_p_3,
728 .do_4 = xor_32regs_p_4,
729 .do_5 = xor_32regs_p_5,
732 #define XOR_TRY_TEMPLATES \
734 xor_speed(&xor_block_8regs); \
735 xor_speed(&xor_block_8regs_p); \
736 xor_speed(&xor_block_32regs); \
737 xor_speed(&xor_block_32regs_p); \