2 * mpi_x86.c - MSVC inline assembly implementation of s_mpv_ functions.
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is the Netscape security libraries.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 2000
22 * the Initial Developer. All Rights Reserved.
25 * Benjamin Smedberg <benjamin@smedbergs.us>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
43 static int is_sse = -1;
44 extern unsigned long s_mpi_is_sse2();
47 * ebp - 36: caller's esi
48 * ebp - 32: caller's edi
56 * ebp + 0: caller's ebp
57 * ebp + 4: return address
59 * ebp + 12: a_len argument
60 * ebp + 16: b argument
61 * ebp + 20: c argument
70 __declspec(naked) void
71 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
90 mov ecx,[ebp+12] ; ecx = a_len
93 je L_2 ; jmp if a_len == 0
94 mov esi,[ebp+8] ; esi = a
97 lodsd ; eax = [ds:esi]; esi += 4
98 mov edx,[ebp+16] ; edx = b
99 mul edx ; edx:eax = Phi:Plo = a_i * b
101 add eax,ebx ; add carry (ebx) to edx:eax
103 mov ebx,edx ; high half of product becomes next carry
105 stosd ; [es:edi] = ax; edi += 4;
107 jnz L_1 ; jmp if a_len != 0
109 mov [edi],ebx ; *c = carry
121 psubq mm2, mm2 ; carry = 0
122 mov ecx, [ebp+12] ; ecx = a_len
123 movd mm1, [ebp+16] ; mm1 = b
126 je L_6 ; jmp if a_len == 0
127 mov esi, [ebp+8] ; esi = a
130 movd mm0, [esi] ; mm0 = *a++
132 pmuludq mm0, mm1 ; mm0 = b * *a++
133 paddq mm2, mm0 ; add the carry
134 movd [edi], mm2 ; store the 32bit result
136 psrlq mm2, 32 ; save the carry
138 jnz L_5 ; jmp if a_len != 0
140 movd [edi], mm2 ; *c = carry
151 * ebp - 36: caller's esi
152 * ebp - 32: caller's edi
160 * ebp + 0: caller's ebp
161 * ebp + 4: return address
162 * ebp + 8: a argument
163 * ebp + 12: a_len argument
164 * ebp + 16: b argument
165 * ebp + 20: c argument
174 __declspec(naked) void
175 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
180 je s_mpv_mul_d_add_x86
181 jg s_mpv_mul_d_add_sse2
185 jg s_mpv_mul_d_add_sse2
193 mov ebx,0 ; carry = 0
194 mov ecx,[ebp+12] ; ecx = a_len
197 je L_11 ; jmp if a_len == 0
198 mov esi,[ebp+8] ; esi = a
201 lodsd ; eax = [ds:esi]; esi += 4
202 mov edx,[ebp+16] ; edx = b
203 mul edx ; edx:eax = Phi:Plo = a_i * b
205 add eax,ebx ; add carry (ebx) to edx:eax
207 mov ebx,[edi] ; add in current word from *c
210 mov ebx,edx ; high half of product becomes next carry
212 stosd ; [es:edi] = ax; edi += 4;
214 jnz L_10 ; jmp if a_len != 0
216 mov [edi],ebx ; *c = carry
223 s_mpv_mul_d_add_sse2:
228 psubq mm2, mm2 ; carry = 0
229 mov ecx, [ebp+12] ; ecx = a_len
230 movd mm1, [ebp+16] ; mm1 = b
233 je L_16 ; jmp if a_len == 0
234 mov esi, [ebp+8] ; esi = a
237 movd mm0, [esi] ; mm0 = *a++
239 pmuludq mm0, mm1 ; mm0 = b * *a++
240 paddq mm2, mm0 ; add the carry
242 paddq mm2, mm0 ; add the carry
243 movd [edi], mm2 ; store the 32bit result
245 psrlq mm2, 32 ; save the carry
247 jnz L_15 ; jmp if a_len != 0
249 movd [edi], mm2 ; *c = carry
260 * ebp - 36: caller's esi
261 * ebp - 32: caller's edi
269 * ebp + 0: caller's ebp
270 * ebp + 4: return address
271 * ebp + 8: a argument
272 * ebp + 12: a_len argument
273 * ebp + 16: b argument
274 * ebp + 20: c argument
283 __declspec(naked) void
284 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
289 je s_mpv_mul_d_add_prop_x86
290 jg s_mpv_mul_d_add_prop_sse2
294 jg s_mpv_mul_d_add_prop_sse2
295 s_mpv_mul_d_add_prop_x86:
302 mov ebx,0 ; carry = 0
303 mov ecx,[ebp+12] ; ecx = a_len
306 je L_21 ; jmp if a_len == 0
308 mov esi,[ebp+8] ; esi = a
310 lodsd ; eax = [ds:esi]; esi += 4
311 mov edx,[ebp+16] ; edx = b
312 mul edx ; edx:eax = Phi:Plo = a_i * b
314 add eax,ebx ; add carry (ebx) to edx:eax
316 mov ebx,[edi] ; add in current word from *c
319 mov ebx,edx ; high half of product becomes next carry
321 stosd ; [es:edi] = ax; edi += 4;
323 jnz L_20 ; jmp if a_len != 0
325 cmp ebx,0 ; is carry zero?
327 mov eax,[edi] ; add in current word from *c
329 stosd ; [es:edi] = ax; edi += 4;
332 mov eax,[edi] ; add in current word from *c
334 stosd ; [es:edi] = ax; edi += 4;
343 s_mpv_mul_d_add_prop_sse2:
349 psubq mm2, mm2 ; carry = 0
350 mov ecx, [ebp+12] ; ecx = a_len
351 movd mm1, [ebp+16] ; mm1 = b
354 je L_26 ; jmp if a_len == 0
355 mov esi, [ebp+8] ; esi = a
358 movd mm0, [esi] ; mm0 = *a++
359 movd mm3, [edi] ; fetch the sum
361 pmuludq mm0, mm1 ; mm0 = b * *a++
362 paddq mm2, mm0 ; add the carry
363 paddq mm2, mm3 ; add *c++
364 movd [edi], mm2 ; store the 32bit result
366 psrlq mm2, 32 ; save the carry
368 jnz L_25 ; jmp if a_len != 0
371 cmp ebx, 0 ; is carry zero?
378 mov eax, [edi] ; add in current word from *c
380 stosd ; [es:edi] = ax; edi += 4;
394 * ebp - 20: caller's esi
395 * ebp - 16: caller's edi
398 * ebp - 4: a_len local
399 * ebp + 0: caller's ebp
400 * ebp + 4: return address
401 * ebp + 8: pa argument
402 * ebp + 12: a_len argument
403 * ebp + 16: ps argument
413 __declspec(naked) void
414 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
419 je s_mpv_sqr_add_prop_x86
420 jg s_mpv_sqr_add_prop_sse2
424 jg s_mpv_sqr_add_prop_sse2
425 s_mpv_sqr_add_prop_x86:
432 mov ebx,0 ; carry = 0
433 mov ecx,[ebp+12] ; a_len
434 mov edi,[ebp+16] ; edi = ps
436 je L_31 ; jump if a_len == 0
438 mov esi,[ebp+8] ; esi = pa
440 lodsd ; eax = [ds:si]; si += 4;
443 add eax,ebx ; add "carry"
446 add eax,ebx ; add low word from result
448 stosd ; [es:di] = eax; di += 4;
449 adc edx,ebx ; add high word from result
453 stosd ; [es:di] = eax; di += 4;
455 jnz L_30 ; jmp if a_len != 0
457 cmp ebx,0 ; is carry zero?
459 mov eax,[edi] ; add in current word from *c
461 stosd ; [es:edi] = ax; edi += 4;
464 mov eax,[edi] ; add in current word from *c
466 stosd ; [es:edi] = ax; edi += 4;
475 s_mpv_sqr_add_prop_sse2:
481 psubq mm2, mm2 ; carry = 0
482 mov ecx, [ebp+12] ; ecx = a_len
485 je L_36 ; jmp if a_len == 0
486 mov esi, [ebp+8] ; esi = a
489 movd mm0, [esi] ; mm0 = *a
490 movd mm3, [edi] ; fetch the sum
492 pmuludq mm0, mm0 ; mm0 = sqr(a)
493 paddq mm2, mm0 ; add the carry
494 paddq mm2, mm3 ; add the low word
496 movd [edi], mm2 ; store the 32bit result
498 paddq mm2, mm3 ; add the high word
499 movd [edi+4], mm2 ; store the 32bit result
500 psrlq mm2, 32 ; save the carry.
503 jnz L_35 ; jmp if a_len != 0
506 cmp ebx, 0 ; is carry zero?
513 mov eax, [edi] ; add in current word from *c
515 stosd ; [es:edi] = ax; edi += 4;
529 * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
530 * so its high bit is 1. This code is from NSPR.
532 * Dump of assembler code for function s_mpv_div_2dx1d:
534 * esp + 0: Caller's ebx
535 * esp + 4: return address
536 * esp + 8: Nhi argument
537 * esp + 12: Nlo argument
538 * esp + 16: divisor argument
539 * esp + 20: qp argument
540 * esp + 24: rp argument
549 __declspec(naked) mp_err
550 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
551 mp_digit *qp, mp_digit *rp)
563 xor eax,eax ; return zero