2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 # ifndef USE_AS_STRCAT
27 # define STRCPY __strcpy_ssse3
30 .section .text.ssse3,"ax",@progbits
33 # ifdef USE_AS_STRNCPY
37 # ifdef USE_AS_STRNCPY
41 jbe L(StrncpyExit8Bytes)
59 # ifdef USE_AS_STRNCPY
61 jb L(StrncpyExit15Bytes)
77 # ifdef USE_AS_STRNCPY
85 # ifdef USE_AS_STRNCPY
89 /* add 16 bytes rcx_shift to r8 */
95 rsi = alignment_16(rcx) + rcx_shift + 16;
96 rcx_shift = rcx - alignment_16(rcx)
100 rsi = alignment_16(rcx) + 16
106 look if there is zero symbol in next 16 bytes of string
107 from rsi to rsi + 15 and form mask in xmm0
109 pcmpeqb (%rsi), %xmm0
113 /* convert byte mask in xmm0 to bit mask */
118 /* rsi = 16 - rcx_shift */
120 /* rax = 0: there isn't end of string from position rsi to rsi+15 */
122 # ifdef USE_AS_STRNCPY
124 jbe L(CopyFrom1To16BytesCase2OrCase3)
127 jnz L(CopyFrom1To16Bytes)
132 rdx = rdx + 16 = alignment_16(rdx) + rdx_shift + 16
136 /* Now: rdx = alignment_16(rdx) + 16 */
140 /* Now: rax = rdx_shift - 16 */
142 # ifdef USE_AS_STRNCPY
154 case rcx_shift >= rdx_shift:
155 rcx = alignment_16(rcx) + (rcx_shift - rdx_shift) + 16
156 case rcx_shift < rdx_shift:
157 rcx = alignment_16(rcx) + (16 + rcx_shift - rdx_shift)
162 case rcx_shift >= rdx_shift: rax = rcx_shift - rdx_shift
163 case rcx_shift < rdx_shift: rax = (16 + rcx_shift - rdx_shift)
164 rax can be 0, 1, ..., 15
168 /* case: rcx_shift == rdx_shift */
206 movaps 16(%rcx), %xmm2
211 # ifdef USE_AS_STRNCPY
213 jbe L(CopyFrom1To16BytesCase2OrCase3)
216 jnz L(CopyFrom1To16Bytes)
218 movaps 16(%rcx, %rsi), %xmm3
219 movaps %xmm2, (%rdx, %rsi)
223 # ifdef USE_AS_STRNCPY
225 jbe L(CopyFrom1To16BytesCase2OrCase3)
228 jnz L(CopyFrom1To16Bytes)
230 movaps 16(%rcx, %rsi), %xmm4
231 movaps %xmm3, (%rdx, %rsi)
235 # ifdef USE_AS_STRNCPY
237 jbe L(CopyFrom1To16BytesCase2OrCase3)
240 jnz L(CopyFrom1To16Bytes)
242 movaps 16(%rcx, %rsi), %xmm1
243 movaps %xmm4, (%rdx, %rsi)
247 # ifdef USE_AS_STRNCPY
249 jbe L(CopyFrom1To16BytesCase2OrCase3)
252 jnz L(CopyFrom1To16Bytes)
254 movaps 16(%rcx, %rsi), %xmm2
255 movaps %xmm1, (%rdx, %rsi)
259 # ifdef USE_AS_STRNCPY
261 jbe L(CopyFrom1To16BytesCase2OrCase3)
264 jnz L(CopyFrom1To16Bytes)
266 movaps 16(%rcx, %rsi), %xmm3
267 movaps %xmm2, (%rdx, %rsi)
271 # ifdef USE_AS_STRNCPY
273 jbe L(CopyFrom1To16BytesCase2OrCase3)
276 jnz L(CopyFrom1To16Bytes)
278 movaps %xmm3, (%rdx, %rsi)
280 lea 16(%rcx, %rsi), %rcx
284 # ifdef USE_AS_STRNCPY
285 lea 48+64(%r8, %rax), %r8
292 movaps 16(%rcx), %xmm5
293 movaps 32(%rcx), %xmm3
295 movaps 48(%rcx), %xmm7
303 # ifdef USE_AS_STRNCPY
305 jbe L(StrncpyLeaveCase2OrCase3)
308 jnz L(Aligned64Leave)
309 movaps %xmm4, -64(%rdx)
310 movaps %xmm5, -48(%rdx)
311 movaps %xmm6, -32(%rdx)
312 movaps %xmm7, -16(%rdx)
316 # ifdef USE_AS_STRNCPY
322 jnz L(CopyFrom1To16Bytes)
325 # ifdef USE_AS_STRNCPY
329 movaps %xmm4, -64(%rdx)
332 jnz L(CopyFrom1To16Bytes)
335 # ifdef USE_AS_STRNCPY
339 movaps %xmm5, -48(%rdx)
342 jnz L(CopyFrom1To16Bytes)
344 movaps %xmm6, -32(%rdx)
346 # ifdef USE_AS_STRNCPY
351 jmp L(CopyFrom1To16Bytes)
355 movaps -1(%rcx), %xmm1
356 movaps 15(%rcx), %xmm2
361 # ifdef USE_AS_STRNCPY
363 jbe L(StrncpyExit1Case2OrCase3)
368 palignr $1, %xmm1, %xmm2
371 movaps 31(%rcx), %xmm2
378 # ifdef USE_AS_STRNCPY
380 jbe L(StrncpyExit1Case2OrCase3)
385 palignr $1, %xmm1, %xmm2
387 movaps 31(%rcx), %xmm2
395 # ifdef USE_AS_STRNCPY
397 jbe L(StrncpyExit1Case2OrCase3)
402 palignr $1, %xmm1, %xmm2
405 movaps 31(%rcx), %xmm2
412 # ifdef USE_AS_STRNCPY
414 jbe L(StrncpyExit1Case2OrCase3)
419 palignr $1, %xmm1, %xmm2
430 # ifdef USE_AS_STRNCPY
433 movaps -1(%rcx), %xmm1
436 movaps 15(%rcx), %xmm2
437 movaps 31(%rcx), %xmm3
439 movaps 47(%rcx), %xmm4
441 movaps 63(%rcx), %xmm5
448 palignr $1, %xmm4, %xmm5
450 palignr $1, %xmm3, %xmm4
452 # ifdef USE_AS_STRNCPY
456 palignr $1, %xmm2, %xmm3
458 palignr $1, %xmm1, %xmm2
460 movaps %xmm5, 48(%rdx)
461 movaps %xmm4, 32(%rdx)
462 movaps %xmm3, 16(%rdx)
471 palignr $1, %xmm1, %xmm6
473 jmp L(CopyFrom1To16Bytes)
477 movaps -2(%rcx), %xmm1
478 movaps 14(%rcx), %xmm2
483 # ifdef USE_AS_STRNCPY
485 jbe L(StrncpyExit2Case2OrCase3)
490 palignr $2, %xmm1, %xmm2
493 movaps 30(%rcx), %xmm2
500 # ifdef USE_AS_STRNCPY
502 jbe L(StrncpyExit2Case2OrCase3)
507 palignr $2, %xmm1, %xmm2
509 movaps 30(%rcx), %xmm2
517 # ifdef USE_AS_STRNCPY
519 jbe L(StrncpyExit2Case2OrCase3)
524 palignr $2, %xmm1, %xmm2
527 movaps 30(%rcx), %xmm2
534 # ifdef USE_AS_STRNCPY
536 jbe L(StrncpyExit2Case2OrCase3)
541 palignr $2, %xmm1, %xmm2
552 # ifdef USE_AS_STRNCPY
555 movaps -2(%rcx), %xmm1
558 movaps 14(%rcx), %xmm2
559 movaps 30(%rcx), %xmm3
561 movaps 46(%rcx), %xmm4
563 movaps 62(%rcx), %xmm5
570 palignr $2, %xmm4, %xmm5
572 palignr $2, %xmm3, %xmm4
574 # ifdef USE_AS_STRNCPY
578 palignr $2, %xmm2, %xmm3
580 palignr $2, %xmm1, %xmm2
582 movaps %xmm5, 48(%rdx)
583 movaps %xmm4, 32(%rdx)
584 movaps %xmm3, 16(%rdx)
593 palignr $2, %xmm1, %xmm6
595 jmp L(CopyFrom1To16Bytes)
599 movaps -3(%rcx), %xmm1
600 movaps 13(%rcx), %xmm2
605 # ifdef USE_AS_STRNCPY
607 jbe L(StrncpyExit3Case2OrCase3)
612 palignr $3, %xmm1, %xmm2
615 movaps 29(%rcx), %xmm2
622 # ifdef USE_AS_STRNCPY
624 jbe L(StrncpyExit3Case2OrCase3)
629 palignr $3, %xmm1, %xmm2
631 movaps 29(%rcx), %xmm2
639 # ifdef USE_AS_STRNCPY
641 jbe L(StrncpyExit3Case2OrCase3)
646 palignr $3, %xmm1, %xmm2
649 movaps 29(%rcx), %xmm2
656 # ifdef USE_AS_STRNCPY
658 jbe L(StrncpyExit3Case2OrCase3)
663 palignr $3, %xmm1, %xmm2
674 # ifdef USE_AS_STRNCPY
677 movaps -3(%rcx), %xmm1
680 movaps 13(%rcx), %xmm2
681 movaps 29(%rcx), %xmm3
683 movaps 45(%rcx), %xmm4
685 movaps 61(%rcx), %xmm5
692 palignr $3, %xmm4, %xmm5
694 palignr $3, %xmm3, %xmm4
696 # ifdef USE_AS_STRNCPY
700 palignr $3, %xmm2, %xmm3
702 palignr $3, %xmm1, %xmm2
704 movaps %xmm5, 48(%rdx)
705 movaps %xmm4, 32(%rdx)
706 movaps %xmm3, 16(%rdx)
715 palignr $3, %xmm1, %xmm6
717 jmp L(CopyFrom1To16Bytes)
721 movaps -4(%rcx), %xmm1
722 movaps 12(%rcx), %xmm2
727 # ifdef USE_AS_STRNCPY
729 jbe L(StrncpyExit4Case2OrCase3)
734 palignr $4, %xmm1, %xmm2
737 movaps 28(%rcx), %xmm2
744 # ifdef USE_AS_STRNCPY
746 jbe L(StrncpyExit4Case2OrCase3)
751 palignr $4, %xmm1, %xmm2
753 movaps 28(%rcx), %xmm2
761 # ifdef USE_AS_STRNCPY
763 jbe L(StrncpyExit4Case2OrCase3)
768 palignr $4, %xmm1, %xmm2
771 movaps 28(%rcx), %xmm2
778 # ifdef USE_AS_STRNCPY
780 jbe L(StrncpyExit4Case2OrCase3)
785 palignr $4, %xmm1, %xmm2
796 # ifdef USE_AS_STRNCPY
799 movaps -4(%rcx), %xmm1
802 movaps 12(%rcx), %xmm2
803 movaps 28(%rcx), %xmm3
805 movaps 44(%rcx), %xmm4
807 movaps 60(%rcx), %xmm5
814 palignr $4, %xmm4, %xmm5
816 palignr $4, %xmm3, %xmm4
818 # ifdef USE_AS_STRNCPY
822 palignr $4, %xmm2, %xmm3
824 palignr $4, %xmm1, %xmm2
826 movaps %xmm5, 48(%rdx)
827 movaps %xmm4, 32(%rdx)
828 movaps %xmm3, 16(%rdx)
837 palignr $4, %xmm1, %xmm6
839 jmp L(CopyFrom1To16Bytes)
843 movaps -5(%rcx), %xmm1
844 movaps 11(%rcx), %xmm2
849 # ifdef USE_AS_STRNCPY
851 jbe L(StrncpyExit5Case2OrCase3)
856 palignr $5, %xmm1, %xmm2
859 movaps 27(%rcx), %xmm2
866 # ifdef USE_AS_STRNCPY
868 jbe L(StrncpyExit5Case2OrCase3)
873 palignr $5, %xmm1, %xmm2
875 movaps 27(%rcx), %xmm2
883 # ifdef USE_AS_STRNCPY
885 jbe L(StrncpyExit5Case2OrCase3)
890 palignr $5, %xmm1, %xmm2
893 movaps 27(%rcx), %xmm2
900 # ifdef USE_AS_STRNCPY
902 jbe L(StrncpyExit5Case2OrCase3)
907 palignr $5, %xmm1, %xmm2
918 # ifdef USE_AS_STRNCPY
921 movaps -5(%rcx), %xmm1
924 movaps 11(%rcx), %xmm2
925 movaps 27(%rcx), %xmm3
927 movaps 43(%rcx), %xmm4
929 movaps 59(%rcx), %xmm5
936 palignr $5, %xmm4, %xmm5
938 palignr $5, %xmm3, %xmm4
940 # ifdef USE_AS_STRNCPY
944 palignr $5, %xmm2, %xmm3
946 palignr $5, %xmm1, %xmm2
948 movaps %xmm5, 48(%rdx)
949 movaps %xmm4, 32(%rdx)
950 movaps %xmm3, 16(%rdx)
959 palignr $5, %xmm1, %xmm6
961 jmp L(CopyFrom1To16Bytes)
965 movaps -6(%rcx), %xmm1
966 movaps 10(%rcx), %xmm2
971 # ifdef USE_AS_STRNCPY
973 jbe L(StrncpyExit6Case2OrCase3)
978 palignr $6, %xmm1, %xmm2
981 movaps 26(%rcx), %xmm2
988 # ifdef USE_AS_STRNCPY
990 jbe L(StrncpyExit6Case2OrCase3)
995 palignr $6, %xmm1, %xmm2
997 movaps 26(%rcx), %xmm2
1000 pcmpeqb %xmm2, %xmm0
1002 pmovmskb %xmm0, %rax
1005 # ifdef USE_AS_STRNCPY
1007 jbe L(StrncpyExit6Case2OrCase3)
1012 palignr $6, %xmm1, %xmm2
1014 movaps %xmm2, (%rdx)
1015 movaps 26(%rcx), %xmm2
1017 pcmpeqb %xmm2, %xmm0
1019 pmovmskb %xmm0, %rax
1022 # ifdef USE_AS_STRNCPY
1024 jbe L(StrncpyExit6Case2OrCase3)
1029 palignr $6, %xmm1, %xmm2
1031 movaps %xmm2, (%rdx)
1040 # ifdef USE_AS_STRNCPY
1043 movaps -6(%rcx), %xmm1
1046 movaps 10(%rcx), %xmm2
1047 movaps 26(%rcx), %xmm3
1049 movaps 42(%rcx), %xmm4
1051 movaps 58(%rcx), %xmm5
1055 pcmpeqb %xmm0, %xmm7
1056 pmovmskb %xmm7, %rax
1058 palignr $6, %xmm4, %xmm5
1060 palignr $6, %xmm3, %xmm4
1062 # ifdef USE_AS_STRNCPY
1064 jbe L(StrncpyLeave6)
1066 palignr $6, %xmm2, %xmm3
1068 palignr $6, %xmm1, %xmm2
1070 movaps %xmm5, 48(%rdx)
1071 movaps %xmm4, 32(%rdx)
1072 movaps %xmm3, 16(%rdx)
1073 movaps %xmm2, (%rdx)
1075 jmp L(Shl6LoopStart)
1078 movaps (%rdx), %xmm6
1081 palignr $6, %xmm1, %xmm6
1082 movaps %xmm6, (%rdx)
1083 jmp L(CopyFrom1To16Bytes)
1087 movaps -7(%rcx), %xmm1
1088 movaps 9(%rcx), %xmm2
1090 pcmpeqb %xmm2, %xmm0
1091 pmovmskb %xmm0, %rax
1093 # ifdef USE_AS_STRNCPY
1095 jbe L(StrncpyExit7Case2OrCase3)
1100 palignr $7, %xmm1, %xmm2
1102 movaps %xmm2, (%rdx)
1103 movaps 25(%rcx), %xmm2
1105 pcmpeqb %xmm2, %xmm0
1107 pmovmskb %xmm0, %rax
1110 # ifdef USE_AS_STRNCPY
1112 jbe L(StrncpyExit7Case2OrCase3)
1117 palignr $7, %xmm1, %xmm2
1118 movaps %xmm2, (%rdx)
1119 movaps 25(%rcx), %xmm2
1122 pcmpeqb %xmm2, %xmm0
1124 pmovmskb %xmm0, %rax
1127 # ifdef USE_AS_STRNCPY
1129 jbe L(StrncpyExit7Case2OrCase3)
1134 palignr $7, %xmm1, %xmm2
1136 movaps %xmm2, (%rdx)
1137 movaps 25(%rcx), %xmm2
1139 pcmpeqb %xmm2, %xmm0
1141 pmovmskb %xmm0, %rax
1144 # ifdef USE_AS_STRNCPY
1146 jbe L(StrncpyExit7Case2OrCase3)
1151 palignr $7, %xmm1, %xmm2
1153 movaps %xmm2, (%rdx)
1162 # ifdef USE_AS_STRNCPY
1165 movaps -7(%rcx), %xmm1
1168 movaps 9(%rcx), %xmm2
1169 movaps 25(%rcx), %xmm3
1171 movaps 41(%rcx), %xmm4
1173 movaps 57(%rcx), %xmm5
1177 pcmpeqb %xmm0, %xmm7
1178 pmovmskb %xmm7, %rax
1180 palignr $7, %xmm4, %xmm5
1182 palignr $7, %xmm3, %xmm4
1184 # ifdef USE_AS_STRNCPY
1186 jbe L(StrncpyLeave7)
1188 palignr $7, %xmm2, %xmm3
1190 palignr $7, %xmm1, %xmm2
1192 movaps %xmm5, 48(%rdx)
1193 movaps %xmm4, 32(%rdx)
1194 movaps %xmm3, 16(%rdx)
1195 movaps %xmm2, (%rdx)
1197 jmp L(Shl7LoopStart)
1200 movaps (%rdx), %xmm6
1203 palignr $7, %xmm1, %xmm6
1204 movaps %xmm6, (%rdx)
1205 jmp L(CopyFrom1To16Bytes)
1209 movaps -8(%rcx), %xmm1
1210 movaps 8(%rcx), %xmm2
1212 pcmpeqb %xmm2, %xmm0
1213 pmovmskb %xmm0, %rax
1215 # ifdef USE_AS_STRNCPY
1217 jbe L(StrncpyExit8Case2OrCase3)
1222 palignr $8, %xmm1, %xmm2
1224 movaps %xmm2, (%rdx)
1225 movaps 24(%rcx), %xmm2
1227 pcmpeqb %xmm2, %xmm0
1229 pmovmskb %xmm0, %rax
1232 # ifdef USE_AS_STRNCPY
1234 jbe L(StrncpyExit8Case2OrCase3)
1239 palignr $8, %xmm1, %xmm2
1240 movaps %xmm2, (%rdx)
1241 movaps 24(%rcx), %xmm2
1244 pcmpeqb %xmm2, %xmm0
1246 pmovmskb %xmm0, %rax
1249 # ifdef USE_AS_STRNCPY
1251 jbe L(StrncpyExit8Case2OrCase3)
1256 palignr $8, %xmm1, %xmm2
1258 movaps %xmm2, (%rdx)
1259 movaps 24(%rcx), %xmm2
1261 pcmpeqb %xmm2, %xmm0
1263 pmovmskb %xmm0, %rax
1266 # ifdef USE_AS_STRNCPY
1268 jbe L(StrncpyExit8Case2OrCase3)
1273 palignr $8, %xmm1, %xmm2
1275 movaps %xmm2, (%rdx)
1284 # ifdef USE_AS_STRNCPY
1287 movaps -8(%rcx), %xmm1
1290 movaps 8(%rcx), %xmm2
1291 movaps 24(%rcx), %xmm3
1293 movaps 40(%rcx), %xmm4
1295 movaps 56(%rcx), %xmm5
1299 pcmpeqb %xmm0, %xmm7
1300 pmovmskb %xmm7, %rax
1302 palignr $8, %xmm4, %xmm5
1304 palignr $8, %xmm3, %xmm4
1306 # ifdef USE_AS_STRNCPY
1308 jbe L(StrncpyLeave8)
1310 palignr $8, %xmm2, %xmm3
1312 palignr $8, %xmm1, %xmm2
1314 movaps %xmm5, 48(%rdx)
1315 movaps %xmm4, 32(%rdx)
1316 movaps %xmm3, 16(%rdx)
1317 movaps %xmm2, (%rdx)
1319 jmp L(Shl8LoopStart)
1322 movaps (%rdx), %xmm6
1325 palignr $8, %xmm1, %xmm6
1326 movaps %xmm6, (%rdx)
1327 jmp L(CopyFrom1To16Bytes)
1331 movaps -9(%rcx), %xmm1
1332 movaps 7(%rcx), %xmm2
1334 pcmpeqb %xmm2, %xmm0
1335 pmovmskb %xmm0, %rax
1337 # ifdef USE_AS_STRNCPY
1339 jbe L(StrncpyExit9Case2OrCase3)
1344 palignr $9, %xmm1, %xmm2
1346 movaps %xmm2, (%rdx)
1347 movaps 23(%rcx), %xmm2
1349 pcmpeqb %xmm2, %xmm0
1351 pmovmskb %xmm0, %rax
1354 # ifdef USE_AS_STRNCPY
1356 jbe L(StrncpyExit9Case2OrCase3)
1361 palignr $9, %xmm1, %xmm2
1362 movaps %xmm2, (%rdx)
1363 movaps 23(%rcx), %xmm2
1366 pcmpeqb %xmm2, %xmm0
1368 pmovmskb %xmm0, %rax
1371 # ifdef USE_AS_STRNCPY
1373 jbe L(StrncpyExit9Case2OrCase3)
1378 palignr $9, %xmm1, %xmm2
1380 movaps %xmm2, (%rdx)
1381 movaps 23(%rcx), %xmm2
1383 pcmpeqb %xmm2, %xmm0
1385 pmovmskb %xmm0, %rax
1388 # ifdef USE_AS_STRNCPY
1390 jbe L(StrncpyExit9Case2OrCase3)
1395 palignr $9, %xmm1, %xmm2
1397 movaps %xmm2, (%rdx)
1406 # ifdef USE_AS_STRNCPY
1409 movaps -9(%rcx), %xmm1
1412 movaps 7(%rcx), %xmm2
1413 movaps 23(%rcx), %xmm3
1415 movaps 39(%rcx), %xmm4
1417 movaps 55(%rcx), %xmm5
1421 pcmpeqb %xmm0, %xmm7
1422 pmovmskb %xmm7, %rax
1424 palignr $9, %xmm4, %xmm5
1426 palignr $9, %xmm3, %xmm4
1428 # ifdef USE_AS_STRNCPY
1430 jbe L(StrncpyLeave9)
1432 palignr $9, %xmm2, %xmm3
1434 palignr $9, %xmm1, %xmm2
1436 movaps %xmm5, 48(%rdx)
1437 movaps %xmm4, 32(%rdx)
1438 movaps %xmm3, 16(%rdx)
1439 movaps %xmm2, (%rdx)
1441 jmp L(Shl9LoopStart)
1444 movaps (%rdx), %xmm6
1447 palignr $9, %xmm1, %xmm6
1448 movaps %xmm6, (%rdx)
1449 jmp L(CopyFrom1To16Bytes)
1453 movaps -10(%rcx), %xmm1
1454 movaps 6(%rcx), %xmm2
1456 pcmpeqb %xmm2, %xmm0
1457 pmovmskb %xmm0, %rax
1459 # ifdef USE_AS_STRNCPY
1461 jbe L(StrncpyExit10Case2OrCase3)
1464 jnz L(Shl10LoopExit)
1466 palignr $10, %xmm1, %xmm2
1468 movaps %xmm2, (%rdx)
1469 movaps 22(%rcx), %xmm2
1471 pcmpeqb %xmm2, %xmm0
1473 pmovmskb %xmm0, %rax
1476 # ifdef USE_AS_STRNCPY
1478 jbe L(StrncpyExit10Case2OrCase3)
1481 jnz L(Shl10LoopExit)
1483 palignr $10, %xmm1, %xmm2
1484 movaps %xmm2, (%rdx)
1485 movaps 22(%rcx), %xmm2
1488 pcmpeqb %xmm2, %xmm0
1490 pmovmskb %xmm0, %rax
1493 # ifdef USE_AS_STRNCPY
1495 jbe L(StrncpyExit10Case2OrCase3)
1498 jnz L(Shl10LoopExit)
1500 palignr $10, %xmm1, %xmm2
1502 movaps %xmm2, (%rdx)
1503 movaps 22(%rcx), %xmm2
1505 pcmpeqb %xmm2, %xmm0
1507 pmovmskb %xmm0, %rax
1510 # ifdef USE_AS_STRNCPY
1512 jbe L(StrncpyExit10Case2OrCase3)
1515 jnz L(Shl10LoopExit)
1517 palignr $10, %xmm1, %xmm2
1519 movaps %xmm2, (%rdx)
1528 # ifdef USE_AS_STRNCPY
1531 movaps -10(%rcx), %xmm1
1534 movaps 6(%rcx), %xmm2
1535 movaps 22(%rcx), %xmm3
1537 movaps 38(%rcx), %xmm4
1539 movaps 54(%rcx), %xmm5
1543 pcmpeqb %xmm0, %xmm7
1544 pmovmskb %xmm7, %rax
1546 palignr $10, %xmm4, %xmm5
1548 palignr $10, %xmm3, %xmm4
1550 # ifdef USE_AS_STRNCPY
1552 jbe L(StrncpyLeave10)
1554 palignr $10, %xmm2, %xmm3
1556 palignr $10, %xmm1, %xmm2
1558 movaps %xmm5, 48(%rdx)
1559 movaps %xmm4, 32(%rdx)
1560 movaps %xmm3, 16(%rdx)
1561 movaps %xmm2, (%rdx)
1563 jmp L(Shl10LoopStart)
1566 movaps (%rdx), %xmm6
1569 palignr $10, %xmm1, %xmm6
1570 movaps %xmm6, (%rdx)
1571 jmp L(CopyFrom1To16Bytes)
1575 movaps -11(%rcx), %xmm1
1576 movaps 5(%rcx), %xmm2
1578 pcmpeqb %xmm2, %xmm0
1579 pmovmskb %xmm0, %rax
1581 # ifdef USE_AS_STRNCPY
1583 jbe L(StrncpyExit11Case2OrCase3)
1586 jnz L(Shl11LoopExit)
1588 palignr $11, %xmm1, %xmm2
1590 movaps %xmm2, (%rdx)
1591 movaps 21(%rcx), %xmm2
1593 pcmpeqb %xmm2, %xmm0
1595 pmovmskb %xmm0, %rax
1598 # ifdef USE_AS_STRNCPY
1600 jbe L(StrncpyExit11Case2OrCase3)
1603 jnz L(Shl11LoopExit)
1605 palignr $11, %xmm1, %xmm2
1606 movaps %xmm2, (%rdx)
1607 movaps 21(%rcx), %xmm2
1610 pcmpeqb %xmm2, %xmm0
1612 pmovmskb %xmm0, %rax
1615 # ifdef USE_AS_STRNCPY
1617 jbe L(StrncpyExit11Case2OrCase3)
1620 jnz L(Shl11LoopExit)
1622 palignr $11, %xmm1, %xmm2
1624 movaps %xmm2, (%rdx)
1625 movaps 21(%rcx), %xmm2
1627 pcmpeqb %xmm2, %xmm0
1629 pmovmskb %xmm0, %rax
1632 # ifdef USE_AS_STRNCPY
1634 jbe L(StrncpyExit11Case2OrCase3)
1637 jnz L(Shl11LoopExit)
1639 palignr $11, %xmm1, %xmm2
1641 movaps %xmm2, (%rdx)
1650 # ifdef USE_AS_STRNCPY
1653 movaps -11(%rcx), %xmm1
1656 movaps 5(%rcx), %xmm2
1657 movaps 21(%rcx), %xmm3
1659 movaps 37(%rcx), %xmm4
1661 movaps 53(%rcx), %xmm5
1665 pcmpeqb %xmm0, %xmm7
1666 pmovmskb %xmm7, %rax
1668 palignr $11, %xmm4, %xmm5
1670 palignr $11, %xmm3, %xmm4
1672 # ifdef USE_AS_STRNCPY
1674 jbe L(StrncpyLeave11)
1676 palignr $11, %xmm2, %xmm3
1678 palignr $11, %xmm1, %xmm2
1680 movaps %xmm5, 48(%rdx)
1681 movaps %xmm4, 32(%rdx)
1682 movaps %xmm3, 16(%rdx)
1683 movaps %xmm2, (%rdx)
1685 jmp L(Shl11LoopStart)
1688 movaps (%rdx), %xmm6
1691 palignr $11, %xmm1, %xmm6
1692 movaps %xmm6, (%rdx)
1693 jmp L(CopyFrom1To16Bytes)
1697 movaps -12(%rcx), %xmm1
1698 movaps 4(%rcx), %xmm2
1700 pcmpeqb %xmm2, %xmm0
1701 pmovmskb %xmm0, %rax
1703 # ifdef USE_AS_STRNCPY
1705 jbe L(StrncpyExit12Case2OrCase3)
1708 jnz L(Shl12LoopExit)
1710 palignr $12, %xmm1, %xmm2
1712 movaps %xmm2, (%rdx)
1713 movaps 20(%rcx), %xmm2
1715 pcmpeqb %xmm2, %xmm0
1717 pmovmskb %xmm0, %rax
1720 # ifdef USE_AS_STRNCPY
1722 jbe L(StrncpyExit12Case2OrCase3)
1725 jnz L(Shl12LoopExit)
1727 palignr $12, %xmm1, %xmm2
1728 movaps %xmm2, (%rdx)
1729 movaps 20(%rcx), %xmm2
1732 pcmpeqb %xmm2, %xmm0
1734 pmovmskb %xmm0, %rax
1737 # ifdef USE_AS_STRNCPY
1739 jbe L(StrncpyExit12Case2OrCase3)
1742 jnz L(Shl12LoopExit)
1744 palignr $12, %xmm1, %xmm2
1746 movaps %xmm2, (%rdx)
1747 movaps 20(%rcx), %xmm2
1749 pcmpeqb %xmm2, %xmm0
1751 pmovmskb %xmm0, %rax
1754 # ifdef USE_AS_STRNCPY
1756 jbe L(StrncpyExit12Case2OrCase3)
1759 jnz L(Shl12LoopExit)
1761 palignr $12, %xmm1, %xmm2
1763 movaps %xmm2, (%rdx)
1772 # ifdef USE_AS_STRNCPY
1775 movaps -12(%rcx), %xmm1
1778 movaps 4(%rcx), %xmm2
1779 movaps 20(%rcx), %xmm3
1781 movaps 36(%rcx), %xmm4
1783 movaps 52(%rcx), %xmm5
1787 pcmpeqb %xmm0, %xmm7
1788 pmovmskb %xmm7, %rax
1790 palignr $12, %xmm4, %xmm5
1792 palignr $12, %xmm3, %xmm4
1794 # ifdef USE_AS_STRNCPY
1796 jbe L(StrncpyLeave12)
1798 palignr $12, %xmm2, %xmm3
1800 palignr $12, %xmm1, %xmm2
1802 movaps %xmm5, 48(%rdx)
1803 movaps %xmm4, 32(%rdx)
1804 movaps %xmm3, 16(%rdx)
1805 movaps %xmm2, (%rdx)
1807 jmp L(Shl12LoopStart)
1810 movaps (%rdx), %xmm6
1813 palignr $12, %xmm1, %xmm6
1814 movaps %xmm6, (%rdx)
1815 jmp L(CopyFrom1To16Bytes)
1819 movaps -13(%rcx), %xmm1
1820 movaps 3(%rcx), %xmm2
1822 pcmpeqb %xmm2, %xmm0
1823 pmovmskb %xmm0, %rax
1825 # ifdef USE_AS_STRNCPY
1827 jbe L(StrncpyExit13Case2OrCase3)
1830 jnz L(Shl13LoopExit)
1832 palignr $13, %xmm1, %xmm2
1834 movaps %xmm2, (%rdx)
1835 movaps 19(%rcx), %xmm2
1837 pcmpeqb %xmm2, %xmm0
1839 pmovmskb %xmm0, %rax
1842 # ifdef USE_AS_STRNCPY
1844 jbe L(StrncpyExit13Case2OrCase3)
1847 jnz L(Shl13LoopExit)
1849 palignr $13, %xmm1, %xmm2
1850 movaps %xmm2, (%rdx)
1851 movaps 19(%rcx), %xmm2
1854 pcmpeqb %xmm2, %xmm0
1856 pmovmskb %xmm0, %rax
1859 # ifdef USE_AS_STRNCPY
1861 jbe L(StrncpyExit13Case2OrCase3)
1864 jnz L(Shl13LoopExit)
1866 palignr $13, %xmm1, %xmm2
1868 movaps %xmm2, (%rdx)
1869 movaps 19(%rcx), %xmm2
1871 pcmpeqb %xmm2, %xmm0
1873 pmovmskb %xmm0, %rax
1876 # ifdef USE_AS_STRNCPY
1878 jbe L(StrncpyExit13Case2OrCase3)
1881 jnz L(Shl13LoopExit)
1883 palignr $13, %xmm1, %xmm2
1885 movaps %xmm2, (%rdx)
1894 # ifdef USE_AS_STRNCPY
1897 movaps -13(%rcx), %xmm1
1900 movaps 3(%rcx), %xmm2
1901 movaps 19(%rcx), %xmm3
1903 movaps 35(%rcx), %xmm4
1905 movaps 51(%rcx), %xmm5
1909 pcmpeqb %xmm0, %xmm7
1910 pmovmskb %xmm7, %rax
1912 palignr $13, %xmm4, %xmm5
1914 palignr $13, %xmm3, %xmm4
1916 # ifdef USE_AS_STRNCPY
1918 jbe L(StrncpyLeave13)
1920 palignr $13, %xmm2, %xmm3
1922 palignr $13, %xmm1, %xmm2
1924 movaps %xmm5, 48(%rdx)
1925 movaps %xmm4, 32(%rdx)
1926 movaps %xmm3, 16(%rdx)
1927 movaps %xmm2, (%rdx)
1929 jmp L(Shl13LoopStart)
1932 movaps (%rdx), %xmm6
1935 palignr $13, %xmm1, %xmm6
1936 movaps %xmm6, (%rdx)
1937 jmp L(CopyFrom1To16Bytes)
1941 movaps -14(%rcx), %xmm1
1942 movaps 2(%rcx), %xmm2
1944 pcmpeqb %xmm2, %xmm0
1945 pmovmskb %xmm0, %rax
1947 # ifdef USE_AS_STRNCPY
1949 jbe L(StrncpyExit14Case2OrCase3)
1952 jnz L(Shl14LoopExit)
1954 palignr $14, %xmm1, %xmm2
1956 movaps %xmm2, (%rdx)
1957 movaps 18(%rcx), %xmm2
1959 pcmpeqb %xmm2, %xmm0
1961 pmovmskb %xmm0, %rax
1964 # ifdef USE_AS_STRNCPY
1966 jbe L(StrncpyExit14Case2OrCase3)
1969 jnz L(Shl14LoopExit)
1971 palignr $14, %xmm1, %xmm2
1972 movaps %xmm2, (%rdx)
1973 movaps 18(%rcx), %xmm2
1976 pcmpeqb %xmm2, %xmm0
1978 pmovmskb %xmm0, %rax
1981 # ifdef USE_AS_STRNCPY
1983 jbe L(StrncpyExit14Case2OrCase3)
1986 jnz L(Shl14LoopExit)
1988 palignr $14, %xmm1, %xmm2
1990 movaps %xmm2, (%rdx)
1991 movaps 18(%rcx), %xmm2
1993 pcmpeqb %xmm2, %xmm0
1995 pmovmskb %xmm0, %rax
1998 # ifdef USE_AS_STRNCPY
2000 jbe L(StrncpyExit14Case2OrCase3)
2003 jnz L(Shl14LoopExit)
2005 palignr $14, %xmm1, %xmm2
2007 movaps %xmm2, (%rdx)
2016 # ifdef USE_AS_STRNCPY
2019 movaps -14(%rcx), %xmm1
2022 movaps 2(%rcx), %xmm2
2023 movaps 18(%rcx), %xmm3
2025 movaps 34(%rcx), %xmm4
2027 movaps 50(%rcx), %xmm5
2031 pcmpeqb %xmm0, %xmm7
2032 pmovmskb %xmm7, %rax
2034 palignr $14, %xmm4, %xmm5
2036 palignr $14, %xmm3, %xmm4
2038 # ifdef USE_AS_STRNCPY
2040 jbe L(StrncpyLeave14)
2042 palignr $14, %xmm2, %xmm3
2044 palignr $14, %xmm1, %xmm2
2046 movaps %xmm5, 48(%rdx)
2047 movaps %xmm4, 32(%rdx)
2048 movaps %xmm3, 16(%rdx)
2049 movaps %xmm2, (%rdx)
2051 jmp L(Shl14LoopStart)
2054 movaps (%rdx), %xmm6
2057 palignr $14, %xmm1, %xmm6
2058 movaps %xmm6, (%rdx)
2059 jmp L(CopyFrom1To16Bytes)
2063 movaps -15(%rcx), %xmm1
2064 movaps 1(%rcx), %xmm2
2066 pcmpeqb %xmm2, %xmm0
2067 pmovmskb %xmm0, %rax
2069 # ifdef USE_AS_STRNCPY
2071 jbe L(StrncpyExit15Case2OrCase3)
2074 jnz L(Shl15LoopExit)
2076 palignr $15, %xmm1, %xmm2
2078 movaps %xmm2, (%rdx)
2079 movaps 17(%rcx), %xmm2
2081 pcmpeqb %xmm2, %xmm0
2083 pmovmskb %xmm0, %rax
2086 # ifdef USE_AS_STRNCPY
2088 jbe L(StrncpyExit15Case2OrCase3)
2091 jnz L(Shl15LoopExit)
2093 palignr $15, %xmm1, %xmm2
2094 movaps %xmm2, (%rdx)
2095 movaps 17(%rcx), %xmm2
2098 pcmpeqb %xmm2, %xmm0
2100 pmovmskb %xmm0, %rax
2103 # ifdef USE_AS_STRNCPY
2105 jbe L(StrncpyExit15Case2OrCase3)
2108 jnz L(Shl15LoopExit)
2110 palignr $15, %xmm1, %xmm2
2112 movaps %xmm2, (%rdx)
2113 movaps 17(%rcx), %xmm2
2115 pcmpeqb %xmm2, %xmm0
2117 pmovmskb %xmm0, %rax
2120 # ifdef USE_AS_STRNCPY
2122 jbe L(StrncpyExit15Case2OrCase3)
2125 jnz L(Shl15LoopExit)
2127 palignr $15, %xmm1, %xmm2
2129 movaps %xmm2, (%rdx)
2138 # ifdef USE_AS_STRNCPY
2141 movaps -15(%rcx), %xmm1
2144 movaps 1(%rcx), %xmm2
2145 movaps 17(%rcx), %xmm3
2147 movaps 33(%rcx), %xmm4
2149 movaps 49(%rcx), %xmm5
2153 pcmpeqb %xmm0, %xmm7
2154 pmovmskb %xmm7, %rax
2156 palignr $15, %xmm4, %xmm5
2158 palignr $15, %xmm3, %xmm4
2160 # ifdef USE_AS_STRNCPY
2162 jbe L(StrncpyLeave15)
2164 palignr $15, %xmm2, %xmm3
2166 palignr $15, %xmm1, %xmm2
2168 movaps %xmm5, 48(%rdx)
2169 movaps %xmm4, 32(%rdx)
2170 movaps %xmm3, 16(%rdx)
2171 movaps %xmm2, (%rdx)
2173 jmp L(Shl15LoopStart)
2176 movaps (%rdx), %xmm6
2179 palignr $15, %xmm1, %xmm6
2180 movaps %xmm6, (%rdx)
2181 # ifdef USE_AS_STRCAT
2182 jmp L(CopyFrom1To16Bytes)
2185 # ifndef USE_AS_STRCAT
2187 L(CopyFrom1To16Bytes):
2188 # ifdef USE_AS_STRNCPY
2215 # ifdef USE_AS_STPCPY
2220 # ifdef USE_AS_STRNCPY
2223 jnz L(StrncpyFillTailWithZero1)
2224 # ifdef USE_AS_STPCPY
2254 # ifdef USE_AS_STPCPY
2259 # ifdef USE_AS_STRNCPY
2262 jnz L(StrncpyFillTailWithZero1)
2263 # ifdef USE_AS_STPCPY
2270 # ifdef USE_AS_STRNCPY
2273 L(CopyFrom1To16BytesCase2):
2276 lea (%rsi, %rdx), %rsi
2346 L(CopyFrom1To16BytesCase2OrCase3):
2348 jnz L(CopyFrom1To16BytesCase2)
2351 L(CopyFrom1To16BytesCase3):
2368 L(More8Case3): /* but less than 16 */
2376 L(More4Case3): /* but less than 8 */
2381 L(Less12Case3): /* but more than 8 */
2392 # ifdef USE_AS_STPCPY
2397 # ifdef USE_AS_STRNCPY
2400 jnz L(StrncpyFillTailWithZero1)
2401 # ifdef USE_AS_STPCPY
2412 # ifdef USE_AS_STPCPY
2417 # ifdef USE_AS_STRNCPY
2420 jnz L(StrncpyFillTailWithZero1)
2421 # ifdef USE_AS_STPCPY
2434 # ifdef USE_AS_STPCPY
2439 # ifdef USE_AS_STRNCPY
2442 jnz L(StrncpyFillTailWithZero1)
2443 # ifdef USE_AS_STPCPY
2454 # ifdef USE_AS_STPCPY
2459 # ifdef USE_AS_STRNCPY
2462 jnz L(StrncpyFillTailWithZero1)
2463 # ifdef USE_AS_STPCPY
2476 # ifdef USE_AS_STPCPY
2481 # ifdef USE_AS_STRNCPY
2484 jnz L(StrncpyFillTailWithZero1)
2485 # ifdef USE_AS_STPCPY
2498 # ifdef USE_AS_STPCPY
2503 # ifdef USE_AS_STRNCPY
2506 jnz L(StrncpyFillTailWithZero1)
2507 # ifdef USE_AS_STPCPY
2520 # ifdef USE_AS_STPCPY
2525 # ifdef USE_AS_STRNCPY
2528 jnz L(StrncpyFillTailWithZero1)
2529 # ifdef USE_AS_STPCPY
2542 # ifdef USE_AS_STPCPY
2547 # ifdef USE_AS_STRNCPY
2550 jnz L(StrncpyFillTailWithZero1)
2551 # ifdef USE_AS_STPCPY
2564 # ifdef USE_AS_STPCPY
2569 # ifdef USE_AS_STRNCPY
2572 jnz L(StrncpyFillTailWithZero1)
2573 # ifdef USE_AS_STPCPY
2586 # ifdef USE_AS_STPCPY
2591 # ifdef USE_AS_STRNCPY
2594 jnz L(StrncpyFillTailWithZero1)
2595 # ifdef USE_AS_STPCPY
2608 # ifdef USE_AS_STPCPY
2613 # ifdef USE_AS_STRNCPY
2616 jnz L(StrncpyFillTailWithZero1)
2617 # ifdef USE_AS_STPCPY
2630 # ifdef USE_AS_STPCPY
2635 # ifdef USE_AS_STRNCPY
2638 jnz L(StrncpyFillTailWithZero1)
2639 # ifdef USE_AS_STPCPY
2652 # ifdef USE_AS_STPCPY
2657 # ifdef USE_AS_STRNCPY
2660 jnz L(StrncpyFillTailWithZero1)
2661 # ifdef USE_AS_STPCPY
2674 # ifdef USE_AS_STPCPY
2679 # ifdef USE_AS_STRNCPY
2682 jnz L(StrncpyFillTailWithZero1)
2683 # ifdef USE_AS_STPCPY
2690 # ifdef USE_AS_STRNCPY
2788 L(StrncpyFillExit1):
2790 L(FillFrom1To16Bytes):
2805 L(FillMore8): /* but less than 16 */
2813 L(FillMore4): /* but less than 8 */
2818 L(FillLess12): /* but more than 8 */
2825 L(StrncpyFillTailWithZero1):
2828 jbe L(StrncpyFillExit1)
2842 jb L(StrncpyFillLess64)
2844 L(StrncpyFillLoopMovdqa):
2845 movdqa %xmm0, (%rcx)
2846 movdqa %xmm0, 16(%rcx)
2847 movdqa %xmm0, 32(%rcx)
2848 movdqa %xmm0, 48(%rcx)
2851 jae L(StrncpyFillLoopMovdqa)
2853 L(StrncpyFillLess64):
2855 jl L(StrncpyFillLess32)
2856 movdqa %xmm0, (%rcx)
2857 movdqa %xmm0, 16(%rcx)
2860 jl L(StrncpyFillExit1)
2861 movdqa %xmm0, (%rcx)
2863 jmp L(FillFrom1To16Bytes)
2865 L(StrncpyFillLess32):
2867 jl L(StrncpyFillExit1)
2868 movdqa %xmm0, (%rcx)
2870 jmp L(FillFrom1To16Bytes)
2878 L(StrncpyExit15Bytes):
2907 # ifdef USE_AS_STPCPY
2917 L(StrncpyExit8Bytes):
2948 # ifdef USE_AS_STPCPY
2961 # ifdef USE_AS_STRNCPY
2963 L(StrncpyLeaveCase2OrCase3):
2965 jnz L(Aligned64LeaveCase2)
2967 L(Aligned64LeaveCase3):
2970 jbe L(CopyFrom1To16BytesCase3)
2971 movaps %xmm4, -64(%rdx)
2974 jbe L(CopyFrom1To16BytesCase3)
2975 movaps %xmm5, -48(%rdx)
2978 jbe L(CopyFrom1To16BytesCase3)
2979 movaps %xmm6, -32(%rdx)
2982 jmp L(CopyFrom1To16BytesCase3)
2984 L(Aligned64LeaveCase2):
2985 pcmpeqb %xmm4, %xmm0
2986 pmovmskb %xmm0, %rax
2988 jle L(CopyFrom1To16BytesCase2OrCase3)
2990 jnz L(CopyFrom1To16Bytes)
2992 pcmpeqb %xmm5, %xmm0
2993 pmovmskb %xmm0, %rax
2994 movaps %xmm4, -64(%rdx)
2997 jbe L(CopyFrom1To16BytesCase2OrCase3)
2999 jnz L(CopyFrom1To16Bytes)
3001 pcmpeqb %xmm6, %xmm0
3002 pmovmskb %xmm0, %rax
3003 movaps %xmm5, -48(%rdx)
3006 jbe L(CopyFrom1To16BytesCase2OrCase3)
3008 jnz L(CopyFrom1To16Bytes)
3010 pcmpeqb %xmm7, %xmm0
3011 pmovmskb %xmm0, %rax
3012 movaps %xmm6, -32(%rdx)
3015 jmp L(CopyFrom1To16BytesCase2)
3016 /*--------------------------------------------------*/
3017 L(StrncpyExit1Case2OrCase3):
3018 movaps (%rdx), %xmm6
3021 palignr $1, %xmm1, %xmm6
3022 movaps %xmm6, (%rdx)
3024 jnz L(CopyFrom1To16BytesCase2)
3025 jmp L(CopyFrom1To16BytesCase3)
3027 L(StrncpyExit2Case2OrCase3):
3028 movaps (%rdx), %xmm6
3031 palignr $2, %xmm1, %xmm6
3032 movaps %xmm6, (%rdx)
3034 jnz L(CopyFrom1To16BytesCase2)
3035 jmp L(CopyFrom1To16BytesCase3)
3037 L(StrncpyExit3Case2OrCase3):
3038 movaps (%rdx), %xmm6
3041 palignr $3, %xmm1, %xmm6
3042 movaps %xmm6, (%rdx)
3044 jnz L(CopyFrom1To16BytesCase2)
3045 jmp L(CopyFrom1To16BytesCase3)
3047 L(StrncpyExit4Case2OrCase3):
3048 movaps (%rdx), %xmm6
3051 palignr $4, %xmm1, %xmm6
3052 movaps %xmm6, (%rdx)
3054 jnz L(CopyFrom1To16BytesCase2)
3055 jmp L(CopyFrom1To16BytesCase3)
3057 L(StrncpyExit5Case2OrCase3):
3058 movaps (%rdx), %xmm6
3061 palignr $5, %xmm1, %xmm6
3062 movaps %xmm6, (%rdx)
3064 jnz L(CopyFrom1To16BytesCase2)
3065 jmp L(CopyFrom1To16BytesCase3)
3067 L(StrncpyExit6Case2OrCase3):
3068 movaps (%rdx), %xmm6
3071 palignr $6, %xmm1, %xmm6
3072 movaps %xmm6, (%rdx)
3074 jnz L(CopyFrom1To16BytesCase2)
3075 jmp L(CopyFrom1To16BytesCase3)
3077 L(StrncpyExit7Case2OrCase3):
3078 movaps (%rdx), %xmm6
3081 palignr $7, %xmm1, %xmm6
3082 movaps %xmm6, (%rdx)
3084 jnz L(CopyFrom1To16BytesCase2)
3085 jmp L(CopyFrom1To16BytesCase3)
3087 L(StrncpyExit8Case2OrCase3):
3088 movaps (%rdx), %xmm6
3091 palignr $8, %xmm1, %xmm6
3092 movaps %xmm6, (%rdx)
3094 jnz L(CopyFrom1To16BytesCase2)
3095 jmp L(CopyFrom1To16BytesCase3)
3097 L(StrncpyExit9Case2OrCase3):
3098 movaps (%rdx), %xmm6
3101 palignr $9, %xmm1, %xmm6
3102 movaps %xmm6, (%rdx)
3104 jnz L(CopyFrom1To16BytesCase2)
3105 jmp L(CopyFrom1To16BytesCase3)
3107 L(StrncpyExit10Case2OrCase3):
3108 movaps (%rdx), %xmm6
3111 palignr $10, %xmm1, %xmm6
3112 movaps %xmm6, (%rdx)
3114 jnz L(CopyFrom1To16BytesCase2)
3115 jmp L(CopyFrom1To16BytesCase3)
3117 L(StrncpyExit11Case2OrCase3):
3118 movaps (%rdx), %xmm6
3121 palignr $11, %xmm1, %xmm6
3122 movaps %xmm6, (%rdx)
3124 jnz L(CopyFrom1To16BytesCase2)
3125 jmp L(CopyFrom1To16BytesCase3)
3127 L(StrncpyExit12Case2OrCase3):
3128 movaps (%rdx), %xmm6
3131 palignr $12, %xmm1, %xmm6
3132 movaps %xmm6, (%rdx)
3134 jnz L(CopyFrom1To16BytesCase2)
3135 jmp L(CopyFrom1To16BytesCase3)
3137 L(StrncpyExit13Case2OrCase3):
3138 movaps (%rdx), %xmm6
3141 palignr $13, %xmm1, %xmm6
3142 movaps %xmm6, (%rdx)
3144 jnz L(CopyFrom1To16BytesCase2)
3145 jmp L(CopyFrom1To16BytesCase3)
3147 L(StrncpyExit14Case2OrCase3):
3148 movaps (%rdx), %xmm6
3151 palignr $14, %xmm1, %xmm6
3152 movaps %xmm6, (%rdx)
3154 jnz L(CopyFrom1To16BytesCase2)
3155 jmp L(CopyFrom1To16BytesCase3)
3157 L(StrncpyExit15Case2OrCase3):
3158 movaps (%rdx), %xmm6
3161 palignr $15, %xmm1, %xmm6
3162 movaps %xmm6, (%rdx)
3164 jnz L(CopyFrom1To16BytesCase2)
3165 jmp L(CopyFrom1To16BytesCase3)
3171 palignr $1, %xmm1, %xmm2
3173 movaps %xmm2, (%rdx)
3174 movaps 31(%rcx), %xmm2
3179 palignr $1, %xmm1, %xmm2
3180 movaps %xmm2, 16(%rdx)
3181 movaps 31+16(%rcx), %xmm2
3187 movaps %xmm4, 32(%rdx)
3192 movaps %xmm5, 48(%rdx)
3197 movaps (%rdx, %rsi), %xmm6
3199 palignr $1, %xmm1, %xmm6
3200 movaps %xmm6, (%rdx, %rsi)
3202 jmp L(CopyFrom1To16BytesCase3)
3208 palignr $2, %xmm1, %xmm2
3210 movaps %xmm2, (%rdx)
3211 movaps 30(%rcx), %xmm2
3216 palignr $2, %xmm1, %xmm2
3217 movaps %xmm2, 16(%rdx)
3218 movaps 30+16(%rcx), %xmm2
3224 movaps %xmm4, 32(%rdx)
3229 movaps %xmm5, 48(%rdx)
3234 movaps (%rdx, %rsi), %xmm6
3236 palignr $2, %xmm1, %xmm6
3237 movaps %xmm6, (%rdx, %rsi)
3239 jmp L(CopyFrom1To16BytesCase3)
3245 palignr $3, %xmm1, %xmm2
3247 movaps %xmm2, (%rdx)
3248 movaps 29(%rcx), %xmm2
3253 palignr $3, %xmm1, %xmm2
3254 movaps %xmm2, 16(%rdx)
3255 movaps 29+16(%rcx), %xmm2
3261 movaps %xmm4, 32(%rdx)
3266 movaps %xmm5, 48(%rdx)
3271 movaps (%rdx, %rsi), %xmm6
3273 palignr $3, %xmm1, %xmm6
3274 movaps %xmm6, (%rdx, %rsi)
3276 jmp L(CopyFrom1To16BytesCase3)
3282 palignr $4, %xmm1, %xmm2
3284 movaps %xmm2, (%rdx)
3285 movaps 28(%rcx), %xmm2
3290 palignr $4, %xmm1, %xmm2
3291 movaps %xmm2, 16(%rdx)
3292 movaps 28+16(%rcx), %xmm2
3298 movaps %xmm4, 32(%rdx)
3303 movaps %xmm5, 48(%rdx)
3308 movaps (%rdx, %rsi), %xmm6
3310 palignr $4, %xmm1, %xmm6
3311 movaps %xmm6, (%rdx, %rsi)
3313 jmp L(CopyFrom1To16BytesCase3)
3319 palignr $5, %xmm1, %xmm2
3321 movaps %xmm2, (%rdx)
3322 movaps 27(%rcx), %xmm2
3327 palignr $5, %xmm1, %xmm2
3328 movaps %xmm2, 16(%rdx)
3329 movaps 27+16(%rcx), %xmm2
3335 movaps %xmm4, 32(%rdx)
3340 movaps %xmm5, 48(%rdx)
3345 movaps (%rdx, %rsi), %xmm6
3347 palignr $5, %xmm1, %xmm6
3348 movaps %xmm6, (%rdx, %rsi)
3350 jmp L(CopyFrom1To16BytesCase3)
3356 palignr $6, %xmm1, %xmm2
3358 movaps %xmm2, (%rdx)
3359 movaps 26(%rcx), %xmm2
3364 palignr $6, %xmm1, %xmm2
3365 movaps %xmm2, 16(%rdx)
3366 movaps 26+16(%rcx), %xmm2
3372 movaps %xmm4, 32(%rdx)
3377 movaps %xmm5, 48(%rdx)
3382 movaps (%rdx, %rsi), %xmm6
3384 palignr $6, %xmm1, %xmm6
3385 movaps %xmm6, (%rdx, %rsi)
3387 jmp L(CopyFrom1To16BytesCase3)
3393 palignr $7, %xmm1, %xmm2
3395 movaps %xmm2, (%rdx)
3396 movaps 25(%rcx), %xmm2
3401 palignr $7, %xmm1, %xmm2
3402 movaps %xmm2, 16(%rdx)
3403 movaps 25+16(%rcx), %xmm2
3409 movaps %xmm4, 32(%rdx)
3414 movaps %xmm5, 48(%rdx)
3419 movaps (%rdx, %rsi), %xmm6
3421 palignr $7, %xmm1, %xmm6
3422 movaps %xmm6, (%rdx, %rsi)
3424 jmp L(CopyFrom1To16BytesCase3)
3430 palignr $8, %xmm1, %xmm2
3432 movaps %xmm2, (%rdx)
3433 movaps 24(%rcx), %xmm2
3438 palignr $8, %xmm1, %xmm2
3439 movaps %xmm2, 16(%rdx)
3440 movaps 24+16(%rcx), %xmm2
3446 movaps %xmm4, 32(%rdx)
3451 movaps %xmm5, 48(%rdx)
3456 movaps (%rdx, %rsi), %xmm6
3458 palignr $8, %xmm1, %xmm6
3459 movaps %xmm6, (%rdx, %rsi)
3461 jmp L(CopyFrom1To16BytesCase3)
3467 palignr $9, %xmm1, %xmm2
3469 movaps %xmm2, (%rdx)
3470 movaps 23(%rcx), %xmm2
3475 palignr $9, %xmm1, %xmm2
3476 movaps %xmm2, 16(%rdx)
3477 movaps 23+16(%rcx), %xmm2
3483 movaps %xmm4, 32(%rdx)
3488 movaps %xmm5, 48(%rdx)
3493 movaps (%rdx, %rsi), %xmm6
3495 palignr $9, %xmm1, %xmm6
3496 movaps %xmm6, (%rdx, %rsi)
3498 jmp L(CopyFrom1To16BytesCase3)
3503 jle L(StrncpyExit10)
3504 palignr $10, %xmm1, %xmm2
3506 movaps %xmm2, (%rdx)
3507 movaps 22(%rcx), %xmm2
3511 jbe L(StrncpyExit10)
3512 palignr $10, %xmm1, %xmm2
3513 movaps %xmm2, 16(%rdx)
3514 movaps 22+16(%rcx), %xmm2
3518 jbe L(StrncpyExit10)
3520 movaps %xmm4, 32(%rdx)
3523 jbe L(StrncpyExit10)
3525 movaps %xmm5, 48(%rdx)
3530 movaps (%rdx, %rsi), %xmm6
3532 palignr $10, %xmm1, %xmm6
3533 movaps %xmm6, (%rdx, %rsi)
3535 jmp L(CopyFrom1To16BytesCase3)
3540 jle L(StrncpyExit11)
3541 palignr $11, %xmm1, %xmm2
3543 movaps %xmm2, (%rdx)
3544 movaps 21(%rcx), %xmm2
3548 jbe L(StrncpyExit11)
3549 palignr $11, %xmm1, %xmm2
3550 movaps %xmm2, 16(%rdx)
3551 movaps 21+16(%rcx), %xmm2
3555 jbe L(StrncpyExit11)
3557 movaps %xmm4, 32(%rdx)
3560 jbe L(StrncpyExit11)
3562 movaps %xmm5, 48(%rdx)
3567 movaps (%rdx, %rsi), %xmm6
3569 palignr $11, %xmm1, %xmm6
3570 movaps %xmm6, (%rdx, %rsi)
3572 jmp L(CopyFrom1To16BytesCase3)
3577 jle L(StrncpyExit12)
3578 palignr $12, %xmm1, %xmm2
3580 movaps %xmm2, (%rdx)
3581 movaps 20(%rcx), %xmm2
3585 jbe L(StrncpyExit12)
3586 palignr $12, %xmm1, %xmm2
3587 movaps %xmm2, 16(%rdx)
3588 movaps 20+16(%rcx), %xmm2
3592 jbe L(StrncpyExit12)
3594 movaps %xmm4, 32(%rdx)
3597 jbe L(StrncpyExit12)
3599 movaps %xmm5, 48(%rdx)
3604 movaps (%rdx, %rsi), %xmm6
3606 palignr $12, %xmm1, %xmm6
3607 movaps %xmm6, (%rdx, %rsi)
3609 jmp L(CopyFrom1To16BytesCase3)
3614 jle L(StrncpyExit13)
3615 palignr $13, %xmm1, %xmm2
3617 movaps %xmm2, (%rdx)
3618 movaps 19(%rcx), %xmm2
3622 jbe L(StrncpyExit13)
3623 palignr $13, %xmm1, %xmm2
3624 movaps %xmm2, 16(%rdx)
3625 movaps 19+16(%rcx), %xmm2
3629 jbe L(StrncpyExit13)
3631 movaps %xmm4, 32(%rdx)
3634 jbe L(StrncpyExit13)
3636 movaps %xmm5, 48(%rdx)
3641 movaps (%rdx, %rsi), %xmm6
3643 palignr $13, %xmm1, %xmm6
3644 movaps %xmm6, (%rdx, %rsi)
3646 jmp L(CopyFrom1To16BytesCase3)
3651 jle L(StrncpyExit14)
3652 palignr $14, %xmm1, %xmm2
3654 movaps %xmm2, (%rdx)
3655 movaps 18(%rcx), %xmm2
3659 jbe L(StrncpyExit14)
3660 palignr $14, %xmm1, %xmm2
3661 movaps %xmm2, 16(%rdx)
3662 movaps 18+16(%rcx), %xmm2
3666 jbe L(StrncpyExit14)
3668 movaps %xmm4, 32(%rdx)
3671 jbe L(StrncpyExit14)
3673 movaps %xmm5, 48(%rdx)
3678 movaps (%rdx, %rsi), %xmm6
3680 palignr $14, %xmm1, %xmm6
3681 movaps %xmm6, (%rdx, %rsi)
3683 jmp L(CopyFrom1To16BytesCase3)
3688 jle L(StrncpyExit15)
3689 palignr $15, %xmm1, %xmm2
3691 movaps %xmm2, (%rdx)
3692 movaps 17(%rcx), %xmm2
3696 jbe L(StrncpyExit15)
3697 palignr $15, %xmm1, %xmm2
3698 movaps %xmm2, 16(%rdx)
3699 movaps 17+16(%rcx), %xmm2
3703 jbe L(StrncpyExit15)
3705 movaps %xmm4, 32(%rdx)
3708 jbe L(StrncpyExit15)
3710 movaps %xmm5, 48(%rdx)
3715 movaps (%rdx, %rsi), %xmm6
3717 palignr $15, %xmm1, %xmm6
3718 movaps %xmm6, (%rdx, %rsi)
3720 jmp L(CopyFrom1To16BytesCase3)
3722 # ifndef USE_AS_STRCAT