X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=simd%2Fjdclrss2-64.asm;h=696a383bddf65b4daa38ed071ad8a888ed69703e;hb=HEAD;hp=fdb33a3627457238dbd9d65ff6237773ca2f845e;hpb=5f9f13097a33ee622c3fc7ddc194b3212485d8b0;p=external%2Flibjpeg-turbo.git diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm index fdb33a3..696a383 100644 --- a/simd/jdclrss2-64.asm +++ b/simd/jdclrss2-64.asm @@ -290,6 +290,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -329,6 +364,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -413,6 +449,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .nextrow mov rax,rcx @@ -452,6 +504,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; ---------------