2 ; jsimdext.inc - common declarations
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ; Copyright 2010 D. R. Commander
8 ; x86 SIMD extension for IJG JPEG library - version 1.02
10 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
12 ; This software is provided 'as-is', without any express or implied
13 ; warranty. In no event will the authors be held liable for any damages
14 ; arising from the use of this software.
16 ; Permission is granted to anyone to use this software for any purpose,
17 ; including commercial applications, and to alter it and redistribute it
18 ; freely, subject to the following restrictions:
20 ; 1. The origin of this software must not be misrepresented; you must not
21 ; claim that you wrote the original software. If you use this software
22 ; in a product, an acknowledgment in the product documentation would be
23 ; appreciated but is not required.
24 ; 2. Altered source versions must be plainly marked as such, and must not be
25 ; misrepresented as being the original software.
26 ; 3. This notice may not be removed or altered from any source distribution.
30 ; ==========================================================================
31 ; System-dependent configurations
33 %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
34 ; * Microsoft Visual C++
35 ; * MinGW (Minimalist GNU for Windows)
39 ; -- segment definition --
42 %define SEG_TEXT .text align=16
43 %define SEG_CONST .rdata align=16
45 %define SEG_TEXT .text align=16 public use32 class=CODE
46 %define SEG_CONST .rdata align=16 public use32 class=CONST
49 %elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
50 ; * Microsoft Visual C++
52 ; -- segment definition --
55 %define SEG_TEXT .text align=16
56 %define SEG_CONST .rdata align=16
58 %define SEG_TEXT .text align=16 public use64 class=CODE
59 %define SEG_CONST .rdata align=16 public use64 class=CONST
61 %define EXTN(name) name ; foo() -> foo
63 %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
64 ; * Borland C++ (Win32)
66 ; -- segment definition --
68 %define SEG_TEXT .text align=16 public use32 class=CODE
69 %define SEG_CONST .data align=16 public use32 class=DATA
71 %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
73 ; * *BSD family Unix using elf format
74 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
76 ; mark stack as non-executable
77 section .note.GNU-stack noalloc noexec nowrite progbits
79 ; -- segment definition --
82 %define SEG_TEXT .text progbits align=16
83 %define SEG_CONST .rodata progbits align=16
85 %define SEG_TEXT .text progbits alloc exec nowrite align=16
86 %define SEG_CONST .rodata progbits alloc noexec nowrite align=16
89 %define STRICT_MEMORY_ACCESS 1
91 ; To make the code position-independent, append -DPIC to the commandline
93 %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
94 %define EXTN(name) name ; foo() -> foo
96 %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
97 ; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
98 ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
100 ; -- segment definition --
102 %define SEG_TEXT .text
103 %define SEG_CONST .data
105 ; To make the code position-independent, append -DPIC to the commandline
107 %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
109 %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
110 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
112 ; -- segment definition --
114 %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
115 %define SEG_CONST .rodata align=16
117 ; The generation of position-independent code (PIC) is the default on Darwin.
120 %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
122 %else ; ----(Other case)----------------------
124 ; -- segment definition --
126 %define SEG_TEXT .text
127 %define SEG_CONST .data
129 %endif ; ----------------------------------------------
131 ; ==========================================================================
133 ; --------------------------------------------------------------------------
137 %define POINTER qword ; general pointer type
138 %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
139 %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
141 %define POINTER dword ; general pointer type
142 %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
143 %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
146 %define INT dword ; signed integer type
147 %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
148 %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
150 %define FP32 dword ; IEEE754 single
151 %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
152 %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
154 %define MMWORD qword ; int64 (MMX register)
155 %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
156 %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
158 ; NASM is buggy and doesn't properly handle operand sizes for SSE
159 ; instructions, so for now we have to define XMMWORD as blank.
160 %define XMMWORD ; int128 (SSE register)
161 %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
162 %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
164 ; Similar hacks for when we load a dword or MMWORD into an xmm# register
168 %define SIZEOF_BYTE 1 ; sizeof(BYTE)
169 %define SIZEOF_WORD 2 ; sizeof(WORD)
170 %define SIZEOF_DWORD 4 ; sizeof(DWORD)
171 %define SIZEOF_QWORD 8 ; sizeof(QWORD)
172 %define SIZEOF_OWORD 16 ; sizeof(OWORD)
174 %define BYTE_BIT 8 ; CHAR_BIT in C
175 %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
176 %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
177 %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
178 %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
180 ; --------------------------------------------------------------------------
181 ; External Symbol Name
184 %define EXTN(name) _ %+ name ; foo() -> _foo
187 ; --------------------------------------------------------------------------
188 ; Macros for position-independent code (PIC) support
194 %ifdef PIC ; -------------------------------------------
196 %ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
198 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
199 ; The PIC support code below is a little tricky.
204 %define GOTOFF(got,sym) (got) + (sym) - const_base
207 ; NOTE: this macro destroys ecx resister.
209 add ecx, byte (%%ref - $)
212 mov ecx, POINTER [esp]
216 xor ebp,ebp ; ebp = 0
217 %ifidni %1,ebx ; (%1 == ebx)
218 ; db 0x8D,0x9C + jmp near const_base =
219 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
221 jmp near const_base ; E9,(const_base-%%ref)
224 ; db 0x8D,0x8C + jmp near const_base =
225 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
227 jmp near const_base ; E9,(const_base-%%ref)
233 %else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
235 %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
240 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
243 mov %1, POINTER [esp]
248 %endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
250 %imacro pushpic 1.nolist
253 %imacro poppic 1.nolist
256 %imacro movpic 2.nolist
260 %else ; !PIC -----------------------------------------
262 %define GOTOFF(got,sym) (sym)
264 %imacro get_GOT 1.nolist
266 %imacro pushpic 1.nolist
268 %imacro poppic 1.nolist
270 %imacro movpic 2.nolist
273 %endif ; PIC -----------------------------------------
275 ; --------------------------------------------------------------------------
276 ; Align the next instruction on {2,4,8,16,..}-byte boundary.
277 ; ".balign n,,m" in GNU as
279 %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
280 %define FILLB(b,n) (($$-(b)) & ((n)-1))
282 %imacro alignx 1-2.nolist 0xFFFF
283 %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
285 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
286 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
287 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
288 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
289 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
290 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
291 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
292 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
293 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
294 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
295 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
296 db 0x8B,0xED ; mov ebp,ebp
297 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
301 ; Align the next data on {2,4,8,16,..}-byte boundary.
303 %imacro alignz 1.nolist
304 align %1, db 0 ; filling zeros
311 %imacro collect_args 0
324 sub rsp, SIZEOF_XMMWORD
325 movlpd XMMWORD [rsp], xmm6
326 sub rsp, SIZEOF_XMMWORD
327 movlpd XMMWORD [rsp], xmm7
330 %imacro uncollect_args 0
331 movlpd xmm7, XMMWORD [rsp]
332 add rsp, SIZEOF_XMMWORD
333 movlpd xmm6, XMMWORD [rsp]
334 add rsp, SIZEOF_XMMWORD
345 %imacro collect_args 0
360 %imacro uncollect_args 0
373 ; --------------------------------------------------------------------------
374 ; Defines picked up from the C headers
376 %include "jsimdcfg.inc"
378 ; --------------------------------------------------------------------------