#ifndef NOT_IN_libc
+
# ifndef USE_AS_STRCAT
# include <sysdep.h>
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
# ifndef STRCPY
# define STRCPY __strcpy_ssse3
# ifdef USE_AS_STRNCPY
# define PARMS 8
-# define ENTRANCE PUSH(%ebx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
-# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
+# define ENTRANCE PUSH (%ebx)
+# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
+# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
# else
# define PARMS 4
# define ENTRANCE
# define RETURN ret
-# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+
+# ifdef USE_AS_STPCPY
+# define SAVE_RESULT(n) lea n(%edx), %eax
+# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
+# else
+# define SAVE_RESULT(n) movl %edi, %eax
+# define SAVE_RESULT_TAIL(n) movl %edx, %eax
# endif
# define STR1 PARMS
movl - 4 byte
movlpd - 8 byte
movaps - 16 byte - requires 16 byte alignment
- of sourse and destination adresses.
- 16 byte alignment: adress is 32bit value,
- right four bit of adress shall be 0.
+ of sourse and destination adresses.
*/
.text
mov STR2(%esp), %ecx
# ifdef USE_AS_STRNCPY
movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitTail0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
# endif
sub $16, %ebx
and $0xf, %esi
-/* add 16 bytes ecx_shift to ebx */
+/* add 16 bytes ecx_offset to ebx */
add %esi, %ebx
# endif
lea 16(%ecx), %esi
-/* Now:
- esi = alignment_16(ecx) + ecx_shift + 16;
- ecx_shift = ecx - alignment_16(ecx)
-*/
and $-16, %esi
-/* Now:
- esi = alignment_16(ecx) + 16
-*/
pxor %xmm0, %xmm0
movlpd (%ecx), %xmm1
movlpd %xmm1, (%edx)
-/*
- look if there is zero symbol in next 16 bytes of string
- from esi to esi + 15 and form mask in xmm0
-*/
+
pcmpeqb (%esi), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
-/* convert byte mask in xmm0 to bit mask */
-
pmovmskb %xmm0, %eax
sub %ecx, %esi
-/* esi = 16 - ecx_shift */
-
-/* eax = 0: there isn't end of string from position esi to esi+15 */
-
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
mov %edx, %eax
lea 16(%edx), %edx
-/* Now:
- edx = edx + 16 = alignment_16(edx) + edx_shift + 16
-*/
and $-16, %edx
-
-/* Now: edx = alignment_16(edx) + 16 */
-
sub %edx, %eax
-/* Now: eax = edx_shift - 16 */
-
# ifdef USE_AS_STRNCPY
add %eax, %esi
lea -1(%esi), %esi
L(ContinueCopy):
# endif
sub %eax, %ecx
-/* Now:
- case ecx_shift >= edx_shift:
- ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
- case ecx_shift < edx_shift:
- ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
-*/
mov %ecx, %eax
and $0xf, %eax
-/* Now:
- case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
- case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
- eax can be 0, 1, ..., 15
-*/
mov $0, %esi
-/* case: ecx_shift == edx_shift */
+/* case: ecx_offset == edx_offset */
jz L(Align16Both)
sub %ecx, %eax
sub %eax, %edx
# ifdef USE_AS_STRNCPY
- lea 48+64(%ebx, %eax), %ebx
+ lea 112(%ebx, %eax), %ebx
# endif
mov $-0x40, %esi
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
test %eax, %eax
jnz L(Shl1LoopExit)
- palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 31(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
test %eax, %eax
jnz L(Shl2LoopExit)
- palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 30(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
test %eax, %eax
jnz L(Shl3LoopExit)
- palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 29(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
test %eax, %eax
jnz L(Shl5LoopExit)
- palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 27(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
test %eax, %eax
jnz L(Shl6LoopExit)
- palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 26(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
test %eax, %eax
jnz L(Shl7LoopExit)
- palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 25(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
test %eax, %eax
jnz L(Shl9LoopExit)
- palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 23(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
test %eax, %eax
jnz L(Shl10LoopExit)
- palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 22(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
test %eax, %eax
jnz L(Shl11LoopExit)
- palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 21(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -3(%ecx), %xmm0
+ movlpd %xmm0, -3(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
test %eax, %eax
jnz L(Shl13LoopExit)
- palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 19(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
test %eax, %eax
jnz L(Shl14LoopExit)
- palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 18(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
test %eax, %eax
jnz L(Shl15LoopExit)
- palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 17(%ecx), %ecx
lea 16(%edx), %edx
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
+
# ifndef USE_AS_STRCAT
.p2align 4
POP (%esi)
test %al, %al
- jz L(ExitHigh)
+ jz L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+ mov %al, %ah
+ and $15, %ah
+ jz L(ExitHigh4)
+
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (3)
+# ifdef USE_AS_STRNCPY
+ sub $4, %ebx
+ lea 4(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4):
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
RETURN1
.p2align 4
-L(ExitHigh):
+L(ExitHigh8):
+ mov %ah, %al
+ and $15, %al
+ jz L(ExitHigh12)
+
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
+
+ .p2align 4
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (11)
+# ifdef USE_AS_STRNCPY
+ sub $12, %ebx
+ lea 12(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12):
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
.p2align 4
L(Exit16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
# endif
RETURN1
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
- lea (%esi, %edx), %esi
- lea -9(%ebx), %edx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%esi), %edx
+ add %esi, %edx
+
POP (%esi)
+
+ test %al, %al
jz L(ExitHighCase2)
- cmp $1, %ebx
- je L(Exit1)
+ cmp $8, %ebx
+ ja L(CopyFrom1To16BytesLess8)
+
test $0x01, %al
jnz L(Exit1)
- cmp $2, %ebx
- je L(Exit2)
+ cmp $1, %ebx
+ je L(Exit1)
test $0x02, %al
jnz L(Exit2)
- cmp $3, %ebx
- je L(Exit3)
+ cmp $2, %ebx
+ je L(Exit2)
test $0x04, %al
jnz L(Exit3)
- cmp $4, %ebx
- je L(Exit4)
+ cmp $3, %ebx
+ je L(Exit3)
test $0x08, %al
jnz L(Exit4)
- cmp $5, %ebx
- je L(Exit5)
+ cmp $4, %ebx
+ je L(Exit4)
test $0x10, %al
jnz L(Exit5)
- cmp $6, %ebx
- je L(Exit6)
+ cmp $5, %ebx
+ je L(Exit5)
test $0x20, %al
jnz L(Exit6)
- cmp $7, %ebx
- je L(Exit7)
+ cmp $6, %ebx
+ je L(Exit6)
test $0x40, %al
jnz L(Exit7)
+ cmp $7, %ebx
+ je L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
- cmp $9, %ebx
- je L(Exit9)
+ cmp $8, %ebx
+ jbe L(CopyFrom1To16BytesLess8Case3)
+
test $0x01, %ah
jnz L(Exit9)
- cmp $10, %ebx
- je L(Exit10)
+ cmp $9, %ebx
+ je L(Exit9)
test $0x02, %ah
jnz L(Exit10)
- cmp $11, %ebx
- je L(Exit11)
+ cmp $10, %ebx
+ je L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- cmp $12, %ebx
- je L(Exit12)
+ cmp $11, %ebx
+ je L(Exit11)
test $0x8, %ah
jnz L(Exit12)
- cmp $13, %ebx
- je L(Exit13)
+ cmp $12, %ebx
+ je L(Exit12)
test $0x10, %ah
jnz L(Exit13)
- cmp $14, %ebx
- je L(Exit14)
+ cmp $13, %ebx
+ je L(Exit13)
test $0x20, %ah
jnz L(Exit14)
- cmp $15, %ebx
- je L(Exit15)
+ cmp $14, %ebx
+ je L(Exit14)
test $0x40, %ah
jnz L(Exit15)
+ cmp $15, %ebx
+ je L(Exit15)
jmp L(Exit16)
CFI_PUSH(%esi)
+ .p2align 4
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
add %esi, %ecx
POP (%esi)
- cmp $16, %ebx
- je L(Exit16)
+
cmp $8, %ebx
- je L(Exit8)
- jg L(More8Case3)
+ ja L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
cmp $4, %ebx
- je L(Exit4)
- jg L(More4Case3)
+ ja L(ExitHigh4Case3)
+
+ cmp $1, %ebx
+ je L(Exit1)
cmp $2, %ebx
- jl L(Exit1)
je L(Exit2)
- jg L(Exit3)
-L(More8Case3): /* but less than 16 */
- cmp $12, %ebx
- je L(Exit12)
- jl L(Less12Case3)
- cmp $14, %ebx
- jl L(Exit13)
- je L(Exit14)
- jg L(Exit15)
-L(More4Case3): /* but less than 8 */
+ cmp $3, %ebx
+ je L(Exit3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (4)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4Case3):
+ cmp $5, %ebx
+ je L(Exit5)
cmp $6, %ebx
- jl L(Exit5)
je L(Exit6)
- jg L(Exit7)
-L(Less12Case3): /* but more than 8 */
+ cmp $7, %ebx
+ je L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT (8)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh8Case3):
+ cmp $12, %ebx
+ ja L(ExitHigh12Case3)
+
+ cmp $9, %ebx
+ je L(Exit9)
cmp $10, %ebx
- jl L(Exit9)
je L(Exit10)
- jg L(Exit11)
-# endif
+ cmp $11, %ebx
+ je L(Exit11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (12)
+ RETURN1
.p2align 4
-L(Exit1):
+L(ExitHigh12Case3):
+ cmp $13, %ebx
+ je L(Exit13)
+ cmp $14, %ebx
+ je L(Exit14)
+ cmp $15, %ebx
+ je L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ SAVE_RESULT (16)
+ RETURN1
+
+# endif
+
+ .p2align 4
+L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
RETURN1
.p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $4, %ebx
- lea 4(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
.p2align 4
L(Exit9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
.p2align 4
L(Exit10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
.p2align 4
L(Exit11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
RETURN1
.p2align 4
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $12, %ebx
- lea 12(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
.p2align 4
L(Exit14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
.p2align 4
L(Exit15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
-L(FillMore8): /* but less than 16 */
+L(FillMore8): /* but less than 16 */
cmp $12, %ebx
je L(Fill12)
jl L(FillLess12)
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
-L(FillMore4): /* but less than 8 */
+L(FillMore4): /* but less than 8 */
cmp $6, %ebx
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
-L(FillLess12): /* but more than 8 */
+L(FillLess12): /* but more than 8 */
cmp $10, %ebx
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
- CFI_PUSH (%edi)
+ CFI_PUSH(%edi)
.p2align 4
L(StrncpyFillTailWithZero1):
L(ExitTail1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
L(ExitTail2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (3)
# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
.p2align 4
L(ExitTail10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
.p2align 4
L(ExitTail11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (11)
# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT_TAIL (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT_TAIL (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT_TAIL (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT_TAIL (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
# endif
# endif
RETURN
-#endif
+# endif
# ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
- CFI_PUSH (%esi)
- CFI_PUSH (%edi)
+ CFI_PUSH (%esi)
+ CFI_PUSH (%edi)
# endif
+ .p2align 4
L(StrncpyLeaveCase2OrCase3):
test %eax, %eax
jnz L(Aligned64LeaveCase2)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
jmp L(CopyFrom1To16BytesCase2)
-/* -------------------------------------------------- */
+
+/*--------------------------------------------------*/
+ .p2align 4
L(StrncpyExit1Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit2Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit3Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit4Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit5Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit6Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit7Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit8Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit9Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit10Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit11Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit12Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit13Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit14Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit15Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
add $48, %ebx
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 31+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit1):
- movaps (%edx, %esi), %xmm6
- psrldq $15, %xmm6
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 15(%esi), %esi
+ lea 15(%edx, %esi), %edx
+ lea 15(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave2):
add $48, %ebx
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 30+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit2):
- movaps (%edx, %esi), %xmm6
- psrldq $14, %xmm6
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 14(%esi), %esi
+ lea 14(%edx, %esi), %edx
+ lea 14(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave3):
add $48, %ebx
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 29+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit3):
- movaps (%edx, %esi), %xmm6
- psrldq $13, %xmm6
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 13(%esi), %esi
+ lea 13(%edx, %esi), %edx
+ lea 13(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave4):
add $48, %ebx
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 28+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit4):
- movaps (%edx, %esi), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 12(%esi), %esi
+ lea 12(%edx, %esi), %edx
+ lea 12(%ecx, %esi), %ecx
+ movlpd -12(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave5):
add $48, %ebx
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 27+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit5):
- movaps (%edx, %esi), %xmm6
- psrldq $11, %xmm6
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 11(%esi), %esi
+ lea 11(%edx, %esi), %edx
+ lea 11(%ecx, %esi), %ecx
+ movlpd -11(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -11(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave6):
add $48, %ebx
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 26+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit6):
- movaps (%edx, %esi), %xmm6
- psrldq $10, %xmm6
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 10(%esi), %esi
+ lea 10(%edx, %esi), %edx
+ lea 10(%ecx, %esi), %ecx
+
+ movlpd -10(%ecx), %xmm0
+ movw -2(%ecx), %ax
+ movlpd %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave7):
add $48, %ebx
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 25+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit7):
- movaps (%edx, %esi), %xmm6
- psrldq $9, %xmm6
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 9(%esi), %esi
+ lea 9(%edx, %esi), %edx
+ lea 9(%ecx, %esi), %ecx
+
+ movlpd -9(%ecx), %xmm0
+ movb -1(%ecx), %ah
+ movlpd %xmm0, -9(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave8):
add $48, %ebx
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 24+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit8):
- movaps (%edx, %esi), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 8(%esi), %esi
+ lea 8(%edx, %esi), %edx
+ lea 8(%ecx, %esi), %ecx
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave9):
add $48, %ebx
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 23+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit9):
- movaps (%edx, %esi), %xmm6
- psrldq $7, %xmm6
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 7(%esi), %esi
+ lea 7(%edx, %esi), %edx
+ lea 7(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave10):
add $48, %ebx
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 22+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit10):
- movaps (%edx, %esi), %xmm6
- psrldq $6, %xmm6
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 6(%esi), %esi
+ lea 6(%edx, %esi), %edx
+ lea 6(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave11):
add $48, %ebx
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 21+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit11):
- movaps (%edx, %esi), %xmm6
- psrldq $5, %xmm6
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 5(%esi), %esi
+ lea 5(%edx, %esi), %edx
+ lea 5(%ecx, %esi), %ecx
+ movl -5(%ecx), %esi
+ movb -1(%ecx), %ah
+ movl %esi, -5(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave12):
add $48, %ebx
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 20+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit12):
- movaps (%edx, %esi), %xmm6
- psrldq $4, %xmm6
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 4(%esi), %esi
+ lea 4(%edx, %esi), %edx
+ lea 4(%ecx, %esi), %ecx
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave13):
add $48, %ebx
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 19+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit13):
- movaps (%edx, %esi), %xmm6
- psrldq $3, %xmm6
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 3(%esi), %esi
+ lea 3(%edx, %esi), %edx
+ lea 3(%ecx, %esi), %ecx
+
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave14):
add $48, %ebx
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 18+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit14):
- movaps (%edx, %esi), %xmm6
- psrldq $2, %xmm6
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 2(%esi), %esi
+ lea 2(%edx, %esi), %edx
+ lea 2(%ecx, %esi), %ecx
+ movw -2(%ecx), %ax
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave15):
add $48, %ebx
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 17+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit15):
- movaps (%edx, %esi), %xmm6
- psrldq $1, %xmm6
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 1(%esi), %esi
+ lea 1(%edx, %esi), %edx
+ lea 1(%ecx, %esi), %ecx
+ movb -1(%ecx), %ah
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
# endif
# ifndef USE_AS_STRCAT
# ifdef USE_AS_STRNCPY
- CFI_POP (%esi)
- CFI_POP (%edi)
+ CFI_POP (%esi)
+ CFI_POP (%edi)
.p2align 4
L(ExitTail0):
.p2align 4
L(StrncpyExit15Bytes):
- cmp $9, %ebx
- je L(ExitTail9)
+ cmp $12, %ebx
+ jbe L(StrncpyExit12Bytes)
cmpb $0, 8(%ecx)
jz L(ExitTail9)
- cmp $10, %ebx
- je L(ExitTail10)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
- cmp $11, %ebx
- je L(ExitTail11)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
- cmp $12, %ebx
- je L(ExitTail12)
cmpb $0, 11(%ecx)
jz L(ExitTail12)
cmp $13, %ebx
cmpb $0, 13(%ecx)
jz L(ExitTail14)
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
+ movlpd %xmm1, 7(%edx)
# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
cmpb $1, (%eax)
RETURN
.p2align 4
+L(StrncpyExit12Bytes):
+ cmp $9, %ebx
+ je L(ExitTail9)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmp $10, %ebx
+ je L(ExitTail10)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmp $11, %ebx
+ je L(ExitTail11)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+
+ .p2align 4
L(StrncpyExit8Bytes):
- cmp $1, %ebx
- je L(ExitTail1)
+ cmp $4, %ebx
+ jbe L(StrncpyExit4Bytes)
cmpb $0, (%ecx)
jz L(ExitTail1)
- cmp $2, %ebx
- je L(ExitTail2)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
- cmp $3, %ebx
- je L(ExitTail3)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
- cmp $4, %ebx
- je L(ExitTail4)
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+
cmp $5, %ebx
je L(ExitTail5)
cmpb $0, 4(%ecx)
movl %edx, %eax
# endif
RETURN
-# endif
+ .p2align 4
+L(StrncpyExit4Bytes):
+ test %ebx, %ebx
+ jz L(ExitTail0)
+ cmp $1, %ebx
+ je L(ExitTail1)
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmp $2, %ebx
+ je L(ExitTail2)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmp $3, %ebx
+ je L(ExitTail3)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+# endif
END (STRCPY)
# endif