2 ; jcsample.asm - downsampling (MMX)
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
6 ; Based on the x86 SIMD extension for IJG JPEG library
7 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
8 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
10 ; This file should be assembled with NASM (Netwide Assembler),
11 ; can *not* be assembled with Microsoft's MASM or any compatible
12 ; assembler (including Borland's Turbo Assembler).
13 ; NASM is available from http://nasm.sourceforge.net/ or
14 ; http://sourceforge.net/project/showfiles.php?group_id=6208
18 %include "jsimdext.inc"
20 ; --------------------------------------------------------------------------
24 ; Downsample pixel values of a single component.
25 ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
29 ; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
30 ; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
31 ; JSAMPARRAY input_data, JSAMPARRAY output_data);
34 %define img_width(b) (b)+8 ; JDIMENSION image_width
35 %define max_v_samp(b) (b)+12 ; int max_v_samp_factor
36 %define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
37 %define width_blks(b) (b)+20 ; JDIMENSION width_blocks
38 %define input_data(b) (b)+24 ; JSAMPARRAY input_data
39 %define output_data(b) (b)+28 ; JSAMPARRAY output_data
42 global EXTN(jsimd_h2v1_downsample_mmx)
44 EXTN(jsimd_h2v1_downsample_mmx):
48 ; push ecx ; need not be preserved
49 ; push edx ; need not be preserved
53 mov ecx, JDIMENSION [width_blks(ebp)]
54 shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
57 mov edx, JDIMENSION [img_width(ebp)]
59 ; -- expand_right_edge
62 shl ecx,1 ; output_cols * 2
66 mov eax, INT [max_v_samp(ebp)]
71 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
77 mov edi, JSAMPROW [esi]
79 mov al, JSAMPLE [edi-1]
86 add esi, byte SIZEOF_JSAMPROW
95 mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
99 mov edx, 0x00010000 ; bias pattern
102 punpckldq mm7,mm7 ; mm7={0, 1, 0, 1}
103 psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
105 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
106 mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
113 mov esi, JSAMPROW [esi] ; inptr
114 mov edi, JSAMPROW [edi] ; outptr
118 movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
119 movq mm1, MMWORD [esi+1*SIZEOF_MMWORD]
137 movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
139 add esi, byte 2*SIZEOF_MMWORD ; inptr
140 add edi, byte 1*SIZEOF_MMWORD ; outptr
141 sub ecx, byte SIZEOF_MMWORD ; outcol
142 jnz short .columnloop
148 add esi, byte SIZEOF_JSAMPROW ; input_data
149 add edi, byte SIZEOF_JSAMPROW ; output_data
153 emms ; empty MMX state
158 ; pop edx ; need not be preserved
159 ; pop ecx ; need not be preserved
164 ; --------------------------------------------------------------------------
166 ; Downsample pixel values of a single component.
167 ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
171 ; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
172 ; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
173 ; JSAMPARRAY input_data, JSAMPARRAY output_data);
176 %define img_width(b) (b)+8 ; JDIMENSION image_width
177 %define max_v_samp(b) (b)+12 ; int max_v_samp_factor
178 %define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
179 %define width_blks(b) (b)+20 ; JDIMENSION width_blocks
180 %define input_data(b) (b)+24 ; JSAMPARRAY input_data
181 %define output_data(b) (b)+28 ; JSAMPARRAY output_data
184 global EXTN(jsimd_h2v2_downsample_mmx)
186 EXTN(jsimd_h2v2_downsample_mmx):
190 ; push ecx ; need not be preserved
191 ; push edx ; need not be preserved
195 mov ecx, JDIMENSION [width_blks(ebp)]
196 shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
199 mov edx, JDIMENSION [img_width(ebp)]
201 ; -- expand_right_edge
204 shl ecx,1 ; output_cols * 2
206 jle short .expand_end
208 mov eax, INT [max_v_samp(ebp)]
210 jle short .expand_end
213 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
219 mov edi, JSAMPROW [esi]
221 mov al, JSAMPLE [edi-1]
228 add esi, byte SIZEOF_JSAMPROW
233 pop ecx ; output_cols
237 mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
241 mov edx, 0x00020001 ; bias pattern
244 punpckldq mm7,mm7 ; mm7={1, 2, 1, 2}
245 psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
247 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
248 mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
255 mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
256 mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
257 mov edi, JSAMPROW [edi] ; outptr
261 movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
262 movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
263 movq mm2, MMWORD [edx+1*SIZEOF_MMWORD]
264 movq mm3, MMWORD [esi+1*SIZEOF_MMWORD]
293 movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
295 add edx, byte 2*SIZEOF_MMWORD ; inptr0
296 add esi, byte 2*SIZEOF_MMWORD ; inptr1
297 add edi, byte 1*SIZEOF_MMWORD ; outptr
298 sub ecx, byte SIZEOF_MMWORD ; outcol
305 add esi, byte 2*SIZEOF_JSAMPROW ; input_data
306 add edi, byte 1*SIZEOF_JSAMPROW ; output_data
310 emms ; empty MMX state
315 ; pop edx ; need not be preserved
316 ; pop ecx ; need not be preserved
321 ; For some reason, the OS X linker does not honor the request to align the
322 ; segment unless we do this.