5 #define DEC(N) uECC_CONCAT(DEC_, N)
7 #define REPEAT_1(stuff) stuff
8 #define REPEAT_2(stuff) REPEAT_1(stuff) stuff
9 #define REPEAT_3(stuff) REPEAT_2(stuff) stuff
10 #define REPEAT_4(stuff) REPEAT_3(stuff) stuff
11 #define REPEAT_5(stuff) REPEAT_4(stuff) stuff
12 #define REPEAT_6(stuff) REPEAT_5(stuff) stuff
13 #define REPEAT_7(stuff) REPEAT_6(stuff) stuff
14 #define REPEAT_8(stuff) REPEAT_7(stuff) stuff
16 #define REPEAT(N, stuff) uECC_CONCAT(REPEAT_, N)(stuff)
18 #define STR2(thing) #thing
19 #define STR(thing) STR2(thing)
21 #if (uECC_ASM == uECC_asm_fast)
23 static uint32_t vli_add(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
30 ".syntax unified \n\t"
31 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
32 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
33 "adds %[left], %[right] \n\t" /* Add first word. */
34 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
36 /* Now we just do the remaining words with the carry bit (using ADC) */
37 REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
38 "ldmia %[rptr]!, {%[right]} \n\t"
39 "adcs %[left], %[right] \n\t"
40 "stmia %[dptr]!, {%[left]} \n\t")
42 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
43 #if (uECC_PLATFORM != uECC_arm_thumb2)
44 ".syntax divided \n\t"
46 #if (uECC_PLATFORM == uECC_arm_thumb)
47 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
48 [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
50 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
51 [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
60 static uint32_t vli_sub(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
67 ".syntax unified \n\t"
68 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
69 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
70 "subs %[left], %[right] \n\t" /* Subtract. */
71 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
73 /* Now we just do the remaining words with the carry bit (using SBC) */
74 REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
75 "ldmia %[rptr]!, {%[right]} \n\t"
76 "sbcs %[left], %[right] \n\t"
77 "stmia %[dptr]!, {%[left]} \n\t")
79 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
80 #if (uECC_PLATFORM != uECC_arm_thumb2)
81 ".syntax divided \n\t"
83 #if (uECC_PLATFORM == uECC_arm_thumb)
84 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
85 [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
87 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
88 [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
93 return !l_carry; // note that on ARM, carry flag set means "no borrow" when subtracting (for some reason...)
97 #if (uECC_PLATFORM != uECC_arm_thumb)
99 static void vli_mult(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
101 register uint32_t *r0 __asm__("r0") = p_result;
102 register uint32_t *r1 __asm__("r1") = p_left;
103 register uint32_t *r2 __asm__("r2") = p_right;
106 ".syntax unified \n\t"
109 "ldmia r1!, {r3,r4} \n\t"
110 "ldmia r2!, {r6,r7} \n\t"
112 "umull r11, r12, r3, r6 \n\t"
113 "stmia r0!, {r11} \n\t"
116 "umull r11, r9, r3, r7 \n\t"
119 "umull r11, r14, r4, r6 \n\t"
123 "stmia r0!, {r12} \n\t"
125 "umull r12, r14, r4, r7 \n\t"
128 "stmia r0!, {r9, r10} \n\t"
132 "ldmia r2!, {r6,r7,r8} \n\t"
133 "ldmia r1!, {r5} \n\t"
135 "umull r11, r12, r3, r6 \n\t"
136 "stmia r0!, {r11} \n\t"
139 "umull r11, r9, r3, r7 \n\t"
142 "umull r11, r14, r4, r6 \n\t"
146 "stmia r0!, {r12} \n\t"
149 "umull r12, r14, r3, r8 \n\t"
153 "umull r12, r14, r4, r7 \n\t"
157 "umull r12, r14, r5, r6 \n\t"
161 "stmia r0!, {r9} \n\t"
163 "ldmia r1!, {r3} \n\t"
165 "umull r14, r9, r4, r8 \n\t"
169 "umull r14, r9, r5, r7 \n\t"
173 "umull r14, r9, r3, r6 \n\t"
181 "stmia r0!, {r10} \n\t"
183 "ldmia r1!, {r4} \n\t"
185 "umull r9, r10, r5, r8 \n\t"
189 "umull r9, r10, r3, r7 \n\t"
193 "umull r9, r10, r4, r6 \n\t"
201 "stmia r0!, {r11} \n\t"
203 "ldmia r2!, {r6} \n\t"
205 "umull r10, r11, r5, r6 \n\t"
209 "umull r10, r11, r3, r8 \n\t"
213 "umull r10, r11, r4, r7 \n\t"
221 "stmia r0!, {r12} \n\t"
223 "ldmia r2!, {r7} \n\t"
225 "umull r11, r12, r5, r7 \n\t"
229 "umull r11, r12, r3, r6 \n\t"
233 "umull r11, r12, r4, r8 \n\t"
241 "stmia r0!, {r14} \n\t"
244 "umull r12, r14, r3, r7 \n\t"
248 "umull r12, r14, r4, r6 \n\t"
252 "stmia r0!, {r9} \n\t"
254 "umull r14, r9, r4, r7 \n\t"
257 "stmia r0!, {r10, r11} \n\t"
258 #if (uECC_PLATFORM != uECC_arm_thumb2)
259 ".syntax divided \n\t"
261 : "+r" (r0), "+r" (r1), "+r" (r2)
263 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
267 #endif /* (uECC_WORDS == 5) */
269 #if (uECC_WORDS == 6)
270 static void vli_mult(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
272 register uint32_t *r0 __asm__("r0") = p_result;
273 register uint32_t *r1 __asm__("r1") = p_left;
274 register uint32_t *r2 __asm__("r2") = p_right;
277 ".syntax unified \n\t"
280 "ldmia r1!, {r3,r4,r5} \n\t"
281 "ldmia r2!, {r6,r7,r8} \n\t"
283 "umull r11, r12, r3, r6 \n\t"
284 "stmia r0!, {r11} \n\t"
287 "umull r11, r9, r3, r7 \n\t"
290 "umull r11, r14, r4, r6 \n\t"
294 "stmia r0!, {r12} \n\t"
297 "umull r12, r14, r3, r8 \n\t"
301 "umull r12, r14, r4, r7 \n\t"
305 "umull r12, r14, r5, r6 \n\t"
309 "stmia r0!, {r9} \n\t"
312 "umull r14, r9, r4, r8 \n\t"
316 "umull r14, r9, r5, r7 \n\t"
320 "stmia r0!, {r10} \n\t"
322 "umull r9, r10, r5, r8 \n\t"
325 "stmia r0!, {r11, r12} \n\t"
329 "ldmia r2!, {r6,r7,r8} \n\t"
331 "umull r11, r12, r3, r6 \n\t"
332 "stmia r0!, {r11} \n\t"
335 "umull r11, r9, r3, r7 \n\t"
338 "umull r11, r14, r4, r6 \n\t"
342 "stmia r0!, {r12} \n\t"
345 "umull r12, r14, r3, r8 \n\t"
349 "umull r12, r14, r4, r7 \n\t"
353 "umull r12, r14, r5, r6 \n\t"
357 "stmia r0!, {r9} \n\t"
359 "ldmia r1!, {r3} \n\t"
361 "umull r14, r9, r4, r8 \n\t"
365 "umull r14, r9, r5, r7 \n\t"
369 "umull r14, r9, r3, r6 \n\t"
377 "stmia r0!, {r10} \n\t"
379 "ldmia r1!, {r4} \n\t"
381 "umull r9, r10, r5, r8 \n\t"
385 "umull r9, r10, r3, r7 \n\t"
389 "umull r9, r10, r4, r6 \n\t"
397 "stmia r0!, {r11} \n\t"
399 "ldmia r1!, {r5} \n\t"
401 "umull r10, r11, r3, r8 \n\t"
405 "umull r10, r11, r4, r7 \n\t"
409 "umull r10, r11, r5, r6 \n\t"
417 "stmia r0!, {r12} \n\t"
419 "ldmia r2!, {r6} \n\t"
421 "umull r11, r12, r3, r6 \n\t"
425 "umull r11, r12, r4, r8 \n\t"
429 "umull r11, r12, r5, r7 \n\t"
437 "stmia r0!, {r14} \n\t"
439 "ldmia r2!, {r7} \n\t"
441 "umull r12, r14, r3, r7 \n\t"
445 "umull r12, r14, r4, r6 \n\t"
449 "umull r12, r14, r5, r8 \n\t"
457 "stmia r0!, {r9} \n\t"
459 "ldmia r2!, {r8} \n\t"
461 "umull r14, r9, r3, r8 \n\t"
465 "umull r14, r9, r4, r7 \n\t"
469 "umull r14, r9, r5, r6 \n\t"
477 "stmia r0!, {r10} \n\t"
480 "umull r9, r10, r4, r8 \n\t"
484 "umull r9, r10, r5, r7 \n\t"
488 "stmia r0!, {r11} \n\t"
490 "umull r10, r11, r5, r8 \n\t"
493 "stmia r0!, {r12, r14} \n\t"
494 #if (uECC_PLATFORM != uECC_arm_thumb2)
495 ".syntax divided \n\t"
497 : "+r" (r0), "+r" (r1), "+r" (r2)
499 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
503 #endif /* (uECC_WORDS == 6) */
505 #if (uECC_WORDS == 8)
506 static void vli_mult(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
508 register uint32_t *r0 __asm__("r0") = p_result;
509 register uint32_t *r1 __asm__("r1") = p_left;
510 register uint32_t *r2 __asm__("r2") = p_right;
513 ".syntax unified \n\t"
516 "ldmia r1!, {r3,r4} \n\t"
517 "ldmia r2!, {r6,r7} \n\t"
519 "umull r11, r12, r3, r6 \n\t"
520 "stmia r0!, {r11} \n\t"
523 "umull r11, r9, r3, r7 \n\t"
526 "umull r11, r14, r4, r6 \n\t"
530 "stmia r0!, {r12} \n\t"
532 "umull r12, r14, r4, r7 \n\t"
535 "stmia r0!, {r9, r10} \n\t"
539 "ldmia r2!, {r6,r7,r8} \n\t"
540 "ldmia r1!, {r5} \n\t"
542 "umull r11, r12, r3, r6 \n\t"
543 "stmia r0!, {r11} \n\t"
546 "umull r11, r9, r3, r7 \n\t"
549 "umull r11, r14, r4, r6 \n\t"
553 "stmia r0!, {r12} \n\t"
556 "umull r12, r14, r3, r8 \n\t"
560 "umull r12, r14, r4, r7 \n\t"
564 "umull r12, r14, r5, r6 \n\t"
568 "stmia r0!, {r9} \n\t"
570 "ldmia r1!, {r3} \n\t"
572 "umull r14, r9, r4, r8 \n\t"
576 "umull r14, r9, r5, r7 \n\t"
580 "umull r14, r9, r3, r6 \n\t"
588 "stmia r0!, {r10} \n\t"
590 "ldmia r1!, {r4} \n\t"
592 "umull r9, r10, r5, r8 \n\t"
596 "umull r9, r10, r3, r7 \n\t"
600 "umull r9, r10, r4, r6 \n\t"
608 "stmia r0!, {r11} \n\t"
610 "ldmia r2!, {r6} \n\t"
612 "umull r10, r11, r5, r6 \n\t"
616 "umull r10, r11, r3, r8 \n\t"
620 "umull r10, r11, r4, r7 \n\t"
628 "stmia r0!, {r12} \n\t"
630 "ldmia r2!, {r7} \n\t"
632 "umull r11, r12, r5, r7 \n\t"
636 "umull r11, r12, r3, r6 \n\t"
640 "umull r11, r12, r4, r8 \n\t"
648 "stmia r0!, {r14} \n\t"
651 "umull r12, r14, r3, r7 \n\t"
655 "umull r12, r14, r4, r6 \n\t"
659 "stmia r0!, {r9} \n\t"
661 "umull r14, r9, r4, r7 \n\t"
664 "stmia r0!, {r10, r11} \n\t"
669 "ldmia r1!, {r3,r4,r5} \n\t"
670 "ldmia r2!, {r6,r7,r8} \n\t"
672 "umull r11, r12, r3, r6 \n\t"
673 "stmia r0!, {r11} \n\t"
676 "umull r11, r9, r3, r7 \n\t"
679 "umull r11, r14, r4, r6 \n\t"
683 "stmia r0!, {r12} \n\t"
686 "umull r12, r14, r3, r8 \n\t"
690 "umull r12, r14, r4, r7 \n\t"
694 "umull r12, r14, r5, r6 \n\t"
698 "stmia r0!, {r9} \n\t"
700 "ldmia r1!, {r3} \n\t"
702 "umull r14, r9, r4, r8 \n\t"
706 "umull r14, r9, r5, r7 \n\t"
710 "umull r14, r9, r3, r6 \n\t"
718 "stmia r0!, {r10} \n\t"
720 "ldmia r1!, {r4} \n\t"
722 "umull r9, r10, r5, r8 \n\t"
726 "umull r9, r10, r3, r7 \n\t"
730 "umull r9, r10, r4, r6 \n\t"
738 "stmia r0!, {r11} \n\t"
740 "ldmia r1!, {r5} \n\t"
742 "umull r10, r11, r3, r8 \n\t"
746 "umull r10, r11, r4, r7 \n\t"
750 "umull r10, r11, r5, r6 \n\t"
758 "stmia r0!, {r12} \n\t"
760 "ldmia r1!, {r3} \n\t"
762 "umull r11, r12, r4, r8 \n\t"
766 "umull r11, r12, r5, r7 \n\t"
770 "umull r11, r12, r3, r6 \n\t"
778 "stmia r0!, {r14} \n\t"
780 "ldmia r1!, {r4} \n\t"
782 "umull r12, r14, r5, r8 \n\t"
786 "umull r12, r14, r3, r7 \n\t"
790 "umull r12, r14, r4, r6 \n\t"
798 "stmia r0!, {r9} \n\t"
800 "ldmia r2!, {r6} \n\t"
802 "umull r14, r9, r5, r6 \n\t"
806 "umull r14, r9, r3, r8 \n\t"
810 "umull r14, r9, r4, r7 \n\t"
818 "stmia r0!, {r10} \n\t"
820 "ldmia r2!, {r7} \n\t"
822 "umull r9, r10, r5, r7 \n\t"
826 "umull r9, r10, r3, r6 \n\t"
830 "umull r9, r10, r4, r8 \n\t"
838 "stmia r0!, {r11} \n\t"
840 "ldmia r2!, {r8} \n\t"
842 "umull r10, r11, r5, r8 \n\t"
846 "umull r10, r11, r3, r7 \n\t"
850 "umull r10, r11, r4, r6 \n\t"
858 "stmia r0!, {r12} \n\t"
860 "ldmia r2!, {r6} \n\t"
862 "umull r11, r12, r5, r6 \n\t"
866 "umull r11, r12, r3, r8 \n\t"
870 "umull r11, r12, r4, r7 \n\t"
878 "stmia r0!, {r14} \n\t"
880 "ldmia r2!, {r7} \n\t"
882 "umull r12, r14, r5, r7 \n\t"
886 "umull r12, r14, r3, r6 \n\t"
890 "umull r12, r14, r4, r8 \n\t"
898 "stmia r0!, {r9} \n\t"
901 "umull r14, r9, r3, r7 \n\t"
905 "umull r14, r9, r4, r6 \n\t"
909 "stmia r0!, {r10} \n\t"
911 "umull r9, r10, r4, r7 \n\t"
914 "stmia r0!, {r11, r12} \n\t"
915 #if (uECC_PLATFORM != uECC_arm_thumb2)
916 ".syntax divided \n\t"
918 : "+r" (r0), "+r" (r1), "+r" (r2)
920 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
924 #endif /* (uECC_WORDS == 8) */
926 #if (uECC_WORDS == 5)
927 static void vli_square(uint32_t *p_result, uint32_t *p_left)
929 register uint32_t *r0 __asm__("r0") = p_result;
930 register uint32_t *r1 __asm__("r1") = p_left;
933 ".syntax unified \n\t"
934 "ldmia r1!, {r2,r3,r4,r5,r6} \n\t"
936 "umull r11, r12, r2, r2 \n\t"
937 "stmia r0!, {r11} \n\t"
940 "umull r10, r11, r2, r3 \n\t"
942 "adcs r8, r11, #0 \n\t"
947 "stmia r0!, {r12} \n\t"
950 "umull r11, r12, r2, r4 \n\t"
957 "umull r11, r12, r3, r3 \n\t"
961 "stmia r0!, {r8} \n\t"
964 "umull r8, r11, r2, r5 \n\t"
965 "umull r1, r14, r3, r4 \n\t"
975 "stmia r0!, {r8} \n\t"
978 "umull r8, r9, r2, r6 \n\t"
979 "umull r1, r14, r3, r5 \n\t"
986 "umull r1, r14, r4, r4 \n\t"
993 "stmia r0!, {r8} \n\t"
996 "umull r8, r11, r3, r6 \n\t"
997 "umull r1, r14, r4, r5 \n\t"
1002 "adcs r11, r11 \n\t"
1005 "adcs r11, r10 \n\t"
1007 "stmia r0!, {r8} \n\t"
1010 "umull r1, r10, r4, r6 \n\t"
1012 "adcs r10, r10 \n\t"
1015 "adcs r12, r10 \n\t"
1017 "umull r1, r10, r5, r5 \n\t"
1019 "adcs r12, r10 \n\t"
1021 "stmia r0!, {r11} \n\t"
1024 "umull r1, r10, r5, r6 \n\t"
1026 "adcs r10, r10 \n\t"
1031 "stmia r0!, {r12} \n\t"
1033 "umull r1, r10, r6, r6 \n\t"
1035 "adcs r11, r10 \n\t"
1036 "stmia r0!, {r8, r11} \n\t"
1037 #if (uECC_PLATFORM != uECC_arm_thumb2)
1038 ".syntax divided \n\t"
1040 : "+r" (r0), "+r" (r1)
1042 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
1045 #define asm_square 1
1046 #endif /* (uECC_WORDS == 5) */
1048 #if (uECC_WORDS == 6)
1049 static void vli_square(uint32_t *p_result, uint32_t *p_left)
1051 register uint32_t *r0 __asm__("r0") = p_result;
1052 register uint32_t *r1 __asm__("r1") = p_left;
1055 ".syntax unified \n\t"
1056 "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t"
1058 "umull r11, r12, r2, r2 \n\t"
1059 "stmia r0!, {r11} \n\t"
1062 "umull r10, r11, r2, r3 \n\t"
1063 "adds r12, r10 \n\t"
1064 "adcs r8, r11, #0 \n\t"
1066 "adds r12, r10 \n\t"
1069 "stmia r0!, {r12} \n\t"
1072 "umull r11, r12, r2, r4 \n\t"
1073 "adds r11, r11 \n\t"
1074 "adcs r12, r12 \n\t"
1079 "umull r11, r12, r3, r3 \n\t"
1083 "stmia r0!, {r8} \n\t"
1086 "umull r8, r11, r2, r5 \n\t"
1087 "umull r1, r14, r3, r4 \n\t"
1089 "adcs r11, r14 \n\t"
1092 "adcs r11, r11 \n\t"
1095 "adcs r11, r10 \n\t"
1097 "stmia r0!, {r8} \n\t"
1100 "umull r8, r9, r2, r6 \n\t"
1101 "umull r1, r14, r3, r5 \n\t"
1108 "umull r1, r14, r4, r4 \n\t"
1115 "stmia r0!, {r8} \n\t"
1118 "umull r8, r11, r2, r7 \n\t"
1119 "umull r1, r14, r3, r6 \n\t"
1121 "adcs r11, r14 \n\t"
1123 "umull r1, r14, r4, r5 \n\t"
1125 "adcs r11, r14 \n\t"
1128 "adcs r11, r11 \n\t"
1131 "adcs r11, r10 \n\t"
1133 "stmia r0!, {r8} \n\t"
1136 "umull r8, r9, r3, r7 \n\t"
1137 "umull r1, r14, r4, r6 \n\t"
1144 "umull r1, r14, r5, r5 \n\t"
1151 "stmia r0!, {r8} \n\t"
1154 "umull r8, r11, r4, r7 \n\t"
1155 "umull r1, r14, r5, r6 \n\t"
1157 "adcs r11, r14 \n\t"
1160 "adcs r11, r11 \n\t"
1163 "adcs r11, r10 \n\t"
1165 "stmia r0!, {r8} \n\t"
1168 "umull r1, r10, r5, r7 \n\t"
1170 "adcs r10, r10 \n\t"
1173 "adcs r12, r10 \n\t"
1175 "umull r1, r10, r6, r6 \n\t"
1177 "adcs r12, r10 \n\t"
1179 "stmia r0!, {r11} \n\t"
1182 "umull r1, r10, r6, r7 \n\t"
1184 "adcs r10, r10 \n\t"
1189 "stmia r0!, {r12} \n\t"
1191 "umull r1, r10, r7, r7 \n\t"
1193 "adcs r11, r10 \n\t"
1194 "stmia r0!, {r8, r11} \n\t"
1195 #if (uECC_PLATFORM != uECC_arm_thumb2)
1196 ".syntax divided \n\t"
1198 : "+r" (r0), "+r" (r1)
1200 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
1203 #define asm_square 1
1204 #endif /* (uECC_WORDS == 6) */
1206 #if (uECC_WORDS == 8)
1207 static void vli_square(uint32_t *p_result, uint32_t *p_left)
1209 register uint32_t *r0 __asm__("r0") = p_result;
1210 register uint32_t *r1 __asm__("r1") = p_left;
1213 ".syntax unified \n\t"
1214 "ldmia r1!, {r2, r3} \n\t"
1216 "ldmia r1!, {r5, r6} \n\t"
1219 "umull r8, r9, r2, r5 \n\t"
1220 "stmia r0!, {r8} \n\t"
1222 "umull r12, r10, r2, r6 \n\t"
1225 "stmia r0!, {r9} \n\t"
1227 "umull r8, r9, r3, r6 \n\t"
1229 "adc r11, r9, #0 \n\t"
1230 "stmia r0!, {r10, r11} \n\t"
1234 "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t"
1236 "umull r11, r12, r2, r2 \n\t"
1237 "stmia r0!, {r11} \n\t"
1240 "umull r10, r11, r2, r3 \n\t"
1241 "adds r12, r10 \n\t"
1242 "adcs r8, r11, #0 \n\t"
1244 "adds r12, r10 \n\t"
1247 "stmia r0!, {r12} \n\t"
1250 "umull r11, r12, r2, r4 \n\t"
1251 "adds r11, r11 \n\t"
1252 "adcs r12, r12 \n\t"
1257 "umull r11, r12, r3, r3 \n\t"
1261 "stmia r0!, {r8} \n\t"
1264 "umull r8, r11, r2, r5 \n\t"
1266 "umlal r8, r11, r3, r4 \n\t"
1269 "adchi r12, #0 \n\t"
1271 "adcs r11, r11 \n\t"
1274 "adcs r11, r10 \n\t"
1276 "stmia r0!, {r8} \n\t"
1279 "umull r8, r9, r2, r6 \n\t"
1281 "umlal r8, r9, r3, r5 \n\t"
1284 "adchi r10, #0 \n\t"
1289 "umlal r8, r9, r4, r4 \n\t"
1292 "adchi r10, #0 \n\t"
1296 "stmia r0!, {r8} \n\t"
1299 "umull r8, r11, r2, r7 \n\t"
1301 "umlal r8, r11, r3, r6 \n\t"
1304 "adchi r12, #0 \n\t"
1306 "umlal r8, r11, r4, r5 \n\t"
1309 "adchi r12, #0 \n\t"
1311 "adcs r11, r11 \n\t"
1314 "adcs r11, r10 \n\t"
1316 "stmia r0!, {r8} \n\t"
1318 "ldmia r1!, {r2} \n\t"
1320 "umull r8, r9, r3, r7 \n\t"
1322 "umlal r8, r9, r4, r6 \n\t"
1325 "adchi r10, #0 \n\t"
1326 "ldr r14, [r0] \n\t"
1334 "umlal r8, r9, r5, r5 \n\t"
1337 "adchi r10, #0 \n\t"
1341 "stmia r0!, {r8} \n\t"
1344 "umull r8, r11, r3, r2 \n\t"
1346 "umlal r8, r11, r4, r7 \n\t"
1349 "adchi r12, #0 \n\t"
1351 "umlal r8, r11, r5, r6 \n\t"
1354 "adchi r12, #0 \n\t"
1355 "ldr r14, [r0] \n\t"
1360 "adcs r11, r11 \n\t"
1363 "adcs r11, r10 \n\t"
1365 "stmia r0!, {r8} \n\t"
1367 "ldmia r1!, {r3} \n\t"
1369 "umull r8, r9, r4, r2 \n\t"
1371 "umlal r8, r9, r5, r7 \n\t"
1374 "adchi r10, #0 \n\t"
1375 "ldr r14, [r0] \n\t"
1383 "umlal r8, r9, r6, r6 \n\t"
1386 "adchi r10, #0 \n\t"
1390 "stmia r0!, {r8} \n\t"
1393 "umull r8, r11, r4, r3 \n\t"
1395 "umlal r8, r11, r5, r2 \n\t"
1398 "adchi r12, #0 \n\t"
1400 "umlal r8, r11, r6, r7 \n\t"
1403 "adchi r12, #0 \n\t"
1404 "ldr r14, [r0] \n\t"
1409 "adcs r11, r11 \n\t"
1412 "adcs r11, r10 \n\t"
1414 "stmia r0!, {r8} \n\t"
1417 "umull r8, r9, r5, r3 \n\t"
1419 "umlal r8, r9, r6, r2 \n\t"
1422 "adchi r10, #0 \n\t"
1427 "umlal r8, r9, r7, r7 \n\t"
1430 "adchi r10, #0 \n\t"
1434 "stmia r0!, {r8} \n\t"
1437 "umull r8, r11, r6, r3 \n\t"
1439 "umlal r8, r11, r7, r2 \n\t"
1442 "adchi r12, #0 \n\t"
1444 "adcs r11, r11 \n\t"
1447 "adcs r11, r10 \n\t"
1449 "stmia r0!, {r8} \n\t"
1452 "umull r1, r10, r7, r3 \n\t"
1454 "adcs r10, r10 \n\t"
1457 "adcs r12, r10 \n\t"
1459 "umull r1, r10, r2, r2 \n\t"
1461 "adcs r12, r10 \n\t"
1463 "stmia r0!, {r11} \n\t"
1466 "umull r1, r10, r2, r3 \n\t"
1468 "adcs r10, r10 \n\t"
1473 "stmia r0!, {r12} \n\t"
1475 "umull r1, r10, r3, r3 \n\t"
1477 "adcs r11, r10 \n\t"
1478 "stmia r0!, {r8, r11} \n\t"
1479 #if (uECC_PLATFORM != uECC_arm_thumb2)
1480 ".syntax divided \n\t"
1482 : "+r" (r0), "+r" (r1)
1484 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
1487 #define asm_square 1
1488 #endif /* (uECC_WORDS == 8) */
1490 #endif /* (uECC_PLATFORM != uECC_arm_thumb) */
1492 #endif /* (uECC_ASM == uECC_asm_fast) */
1495 static uint32_t vli_add(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
1497 uint32_t l_counter = uECC_WORDS;
1498 uint32_t l_carry = 0; /* carry = 0 initially */
1503 ".syntax unified \n\t"
1505 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
1506 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
1507 "lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
1508 "adcs %[left], %[right] \n\t" /* Add with carry. */
1509 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
1510 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
1511 "subs %[ctr], #1 \n\t" /* Decrement index. */
1512 "bne 1b \n\t" /* Loop until index == 0. */
1513 #if (uECC_PLATFORM != uECC_arm_thumb2)
1514 ".syntax divided \n\t"
1516 #if (uECC_PLATFORM == uECC_arm_thumb)
1517 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
1518 [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
1520 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
1521 [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
1532 static uint32_t vli_sub(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
1534 uint32_t l_counter = uECC_WORDS;
1535 uint32_t l_carry = 1; /* carry = 1 initially (means don't borrow) */
1540 ".syntax unified \n\t"
1542 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
1543 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
1544 "lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
1545 "sbcs %[left], %[right] \n\t" /* Subtract with borrow. */
1546 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
1547 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
1548 "subs %[ctr], #1 \n\t" /* Decrement index. */
1549 "bne 1b \n\t" /* Loop until index == 0. */
1550 #if (uECC_PLATFORM != uECC_arm_thumb2)
1551 ".syntax divided \n\t"
1553 #if (uECC_PLATFORM == uECC_arm_thumb)
1554 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
1555 [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
1557 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
1558 [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
1569 static void vli_mult(uint32_t *p_result, uint32_t *p_left, uint32_t *p_right)
1571 #if (uECC_PLATFORM != uECC_arm_thumb)
1580 ".syntax unified \n\t"
1582 "1: \n\t" /* outer loop (k < uECC_WORDS) */
1583 "movs %[i], #0 \n\t" /* i = 0 */
1586 "2: \n\t" /* outer loop (k >= uECC_WORDS) */
1587 "movs %[i], %[k] \n\t" /* i = k */
1588 "subs %[i], %[eccdm1] \n\t" /* i = k - (uECC_WORDS - 1) (times 4) */
1590 "3: \n\t" /* inner loop */
1591 "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
1593 "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = p_right[k-i] */
1594 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
1596 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
1598 "adds %[c0], %[t0] \n\t" /* add low word to c0 */
1599 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
1600 "adcs %[c2], #0 \n\t" /* add carry to c2 */
1602 "adds %[i], #4 \n\t" /* i += 4 */
1603 "cmp %[i], %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
1604 "bge 4f \n\t" /* if not, exit the loop */
1605 "cmp %[i], %[k] \n\t" /* i <= k? */
1606 "ble 3b \n\t" /* if so, continue looping */
1608 "4: \n\t" /* end inner loop */
1610 "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
1611 "mov %[c0], %[c1] \n\t" /* c0 = c1 */
1612 "mov %[c1], %[c2] \n\t" /* c1 = c2 */
1613 "movs %[c2], #0 \n\t" /* c2 = 0 */
1614 "adds %[k], #4 \n\t" /* k += 4 */
1615 "cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
1616 "blt 1b \n\t" /* if not, loop back, start with i = 0 */
1617 "cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
1618 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
1619 /* end outer loop */
1621 "str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
1622 #if (uECC_PLATFORM != uECC_arm_thumb2)
1623 ".syntax divided \n\t"
1625 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
1626 : [result] "r" (p_result), [left] "r" (p_left), [right] "r" (p_right),
1627 [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
1633 register uint32_t *r0 __asm__("r0") = p_result;
1634 register uint32_t *r1 __asm__("r1") = p_left;
1635 register uint32_t *r2 __asm__("r2") = p_right;
1638 ".syntax unified \n\t"
1639 "movs r3, #0 \n\t" /* c0 = 0 */
1640 "movs r4, #0 \n\t" /* c1 = 0 */
1641 "movs r5, #0 \n\t" /* c2 = 0 */
1642 "movs r6, #0 \n\t" /* k = 0 */
1644 "push {r0} \n\t" /* keep p_result on the stack */
1646 "1: \n\t" /* outer loop (k < uECC_WORDS) */
1647 "movs r7, #0 \n\t" /* r7 = i = 0 */
1650 "2: \n\t" /* outer loop (k >= uECC_WORDS) */
1651 "movs r7, r6 \n\t" /* r7 = k */
1652 "subs r7, %[eccdm1] \n\t" /* r7 = i = k - (uECC_WORDS - 1) (times 4) */
1654 "3: \n\t" /* inner loop */
1655 "push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */
1656 "subs r0, r6, r7 \n\t" /* r0 = k-i */
1658 "ldr r4, [r2, r0] \n\t" /* r4 = p_right[k-i] */
1659 "ldr r0, [r1, r7] \n\t" /* r0 = p_left[i] */
1661 "lsrs r3, r0, #16 \n\t" /* r3 = a1 */
1662 "uxth r0, r0 \n\t" /* r0 = a0 */
1664 "lsrs r5, r4, #16 \n\t" /* r5 = b1 */
1665 "uxth r4, r4 \n\t" /* r4 = b0 */
1667 "movs r6, r3 \n\t" /* r6 = a1 */
1668 "muls r6, r5, r6 \n\t" /* r6 = a1*b1 */
1669 "muls r3, r4, r3 \n\t" /* r3 = b0*a1 */
1670 "muls r5, r0, r5 \n\t" /* r5 = a0*b1 */
1671 "muls r0, r4, r0 \n\t" /* r0 = a0*b0 */
1673 "movs r4, #0 \n\t" /* r4 = 0 */
1674 "adds r3, r5 \n\t" /* r3 = b0*a1 + a0*b1 */
1675 "adcs r4, r4 \n\t" /* r4 = carry */
1676 "lsls r4, #16 \n\t" /* r4 = carry << 16 */
1677 "adds r6, r4 \n\t" /* r6 = a1*b1 + carry */
1679 "lsls r4, r3, #16 \n\t" /* r4 = (b0*a1 + a0*b1) << 16 */
1680 "lsrs r3, #16 \n\t" /* r3 = (b0*a1 + a0*b1) >> 16 */
1681 "adds r0, r4 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
1682 "adcs r6, r3 \n\t" /* r6 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
1684 "pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */
1685 "adds r3, r0 \n\t" /* add low word to c0 */
1686 "adcs r4, r6 \n\t" /* add high word to c1, including carry */
1687 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */
1688 "adcs r5, r0 \n\t" /* add carry to c2 */
1690 "pop {r6} \n\t" /* r6 = k */
1692 "adds r7, #4 \n\t" /* i += 4 */
1693 "cmp r7, %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
1694 "bge 4f \n\t" /* if not, exit the loop */
1695 "cmp r7, r6 \n\t" /* i <= k? */
1696 "ble 3b \n\t" /* if so, continue looping */
1698 "4: \n\t" /* end inner loop */
1700 "ldr r0, [sp, #0] \n\t" /* r0 = p_result */
1702 "str r3, [r0, r6] \n\t" /* p_result[k] = c0 */
1703 "mov r3, r4 \n\t" /* c0 = c1 */
1704 "mov r4, r5 \n\t" /* c1 = c2 */
1705 "movs r5, #0 \n\t" /* c2 = 0 */
1706 "adds r6, #4 \n\t" /* k += 4 */
1707 "cmp r6, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
1708 "blt 1b \n\t" /* if not, loop back, start with i = 0 */
1709 "cmp r6, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
1710 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
1711 /* end outer loop */
1713 "str r3, [r0, r6] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
1714 "pop {r0} \n\t" /* pop p_result off the stack */
1716 ".syntax divided \n\t"
1718 : [r0] "l" (r0), [r1] "l" (r1), [r2] "l" (r2), [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
1719 : "r3", "r4", "r5", "r6", "r7", "cc", "memory"
1724 #endif /* !asm_mult */
1726 #if uECC_SQUARE_FUNC
1728 static void vli_square(uint32_t *p_result, uint32_t *p_left)
1730 #if (uECC_PLATFORM != uECC_arm_thumb)
1739 ".syntax unified \n\t"
1741 "1: \n\t" /* outer loop (k < uECC_WORDS) */
1742 "movs %[i], #0 \n\t" /* i = 0 */
1745 "2: \n\t" /* outer loop (k >= uECC_WORDS) */
1746 "movs %[i], %[k] \n\t" /* i = k */
1747 "subs %[i], %[eccdm1] \n\t" /* i = k - (uECC_WORDS - 1) (times 4) */
1749 "3: \n\t" /* inner loop */
1750 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
1752 "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = p_left[k-i] */
1753 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
1755 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
1757 "cmp %[i], %[tt] \n\t" /* (i < k-i) ? */
1758 "bge 4f \n\t" /* if i >= k-i, skip */
1759 "lsls %[t1], #1 \n\t" /* high word << 1 */
1760 "adc %[c2], #0 \n\t" /* add carry bit to c2 */
1761 "lsls %[t0], #1 \n\t" /* low word << 1 */
1762 "adc %[t1], #0 \n\t" /* add carry bit to high word */
1766 "adds %[c0], %[t0] \n\t" /* add low word to c0 */
1767 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
1768 "adc %[c2], #0 \n\t" /* add carry to c2 */
1770 "adds %[i], #4 \n\t" /* i += 4 */
1771 "cmp %[i], %[k] \n\t" /* i <= k? */
1772 "bge 5f \n\t" /* if not, exit the loop */
1773 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
1774 "cmp %[i], %[tt] \n\t" /* i <= k-i? */
1775 "ble 3b \n\t" /* if so, continue looping */
1777 "5: \n\t" /* end inner loop */
1779 "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
1780 "mov %[c0], %[c1] \n\t" /* c0 = c1 */
1781 "mov %[c1], %[c2] \n\t" /* c1 = c2 */
1782 "movs %[c2], #0 \n\t" /* c2 = 0 */
1783 "adds %[k], #4 \n\t" /* k += 4 */
1784 "cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
1785 "blt 1b \n\t" /* if not, loop back, start with i = 0 */
1786 "cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
1787 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
1788 /* end outer loop */
1790 "str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
1791 #if (uECC_PLATFORM != uECC_arm_thumb2)
1792 ".syntax divided \n\t"
1794 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
1795 : [result] "r" (p_result), [left] "r" (p_left),
1796 [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
1802 register uint32_t *r0 __asm__("r0") = p_result;
1803 register uint32_t *r1 __asm__("r1") = p_left;
1806 ".syntax unified \n\t"
1807 "movs r2, #0 \n\t" /* c0 = 0 */
1808 "movs r3, #0 \n\t" /* c1 = 0 */
1809 "movs r4, #0 \n\t" /* c2 = 0 */
1810 "movs r5, #0 \n\t" /* k = 0 */
1812 "push {r0} \n\t" /* keep p_result on the stack */
1814 "1: \n\t" /* outer loop (k < uECC_WORDS) */
1815 "movs r6, #0 \n\t" /* r6 = i = 0 */
1818 "2: \n\t" /* outer loop (k >= uECC_WORDS) */
1819 "movs r6, r5 \n\t" /* r6 = k */
1820 "subs r6, %[eccdm1] \n\t" /* r6 = i = k - (uECC_WORDS - 1) (times 4) */
1822 "3: \n\t" /* inner loop */
1823 "push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */
1824 "subs r7, r5, r6 \n\t" /* r7 = k-i */
1826 "ldr r3, [r1, r7] \n\t" /* r3 = p_left[k-i] */
1827 "ldr r0, [r1, r6] \n\t" /* r0 = p_left[i] */
1829 "lsrs r2, r0, #16 \n\t" /* r2 = a1 */
1830 "uxth r0, r0 \n\t" /* r0 = a0 */
1832 "lsrs r4, r3, #16 \n\t" /* r4 = b1 */
1833 "uxth r3, r3 \n\t" /* r3 = b0 */
1835 "movs r5, r2 \n\t" /* r5 = a1 */
1836 "muls r5, r4, r5 \n\t" /* r5 = a1*b1 */
1837 "muls r2, r3, r2 \n\t" /* r2 = b0*a1 */
1838 "muls r4, r0, r4 \n\t" /* r4 = a0*b1 */
1839 "muls r0, r3, r0 \n\t" /* r0 = a0*b0 */
1841 "movs r3, #0 \n\t" /* r3 = 0 */
1842 "adds r2, r4 \n\t" /* r2 = b0*a1 + a0*b1 */
1843 "adcs r3, r3 \n\t" /* r3 = carry */
1844 "lsls r3, #16 \n\t" /* r3 = carry << 16 */
1845 "adds r5, r3 \n\t" /* r5 = a1*b1 + carry */
1847 "lsls r3, r2, #16 \n\t" /* r3 = (b0*a1 + a0*b1) << 16 */
1848 "lsrs r2, #16 \n\t" /* r2 = (b0*a1 + a0*b1) >> 16 */
1849 "adds r0, r3 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
1850 "adcs r5, r2 \n\t" /* r5 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
1852 "movs r3, #0 \n\t" /* r3 = 0 */
1853 "cmp r6, r7 \n\t" /* (i < k-i) ? */
1854 "mov r7, r3 \n\t" /* r7 = 0 (does not affect condition)*/
1855 "bge 4f \n\t" /* if i >= k-i, skip */
1856 "lsls r5, #1 \n\t" /* high word << 1 */
1857 "adcs r7, r3 \n\t" /* r7 = carry bit for c2 */
1858 "lsls r0, #1 \n\t" /* low word << 1 */
1859 "adcs r5, r3 \n\t" /* add carry from shift to high word */
1862 "pop {r2, r3, r4} \n\t" /* r2 = c0, r3 = c1, r4 = c2 */
1863 "adds r2, r0 \n\t" /* add low word to c0 */
1864 "adcs r3, r5 \n\t" /* add high word to c1, including carry */
1865 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */
1866 "adcs r4, r0 \n\t" /* add carry to c2 */
1867 "adds r4, r7 \n\t" /* add carry from doubling (if any) */
1869 "pop {r5} \n\t" /* r5 = k */
1871 "adds r6, #4 \n\t" /* i += 4 */
1872 "cmp r6, r5 \n\t" /* i <= k? */
1873 "bge 5f \n\t" /* if not, exit the loop */
1874 "subs r7, r5, r6 \n\t" /* r7 = k-i */
1875 "cmp r6, r7 \n\t" /* i <= k-i? */
1876 "ble 3b \n\t" /* if so, continue looping */
1878 "5: \n\t" /* end inner loop */
1880 "ldr r0, [sp, #0] \n\t" /* r0 = p_result */
1882 "str r2, [r0, r5] \n\t" /* p_result[k] = c0 */
1883 "mov r2, r3 \n\t" /* c0 = c1 */
1884 "mov r3, r4 \n\t" /* c1 = c2 */
1885 "movs r4, #0 \n\t" /* c2 = 0 */
1886 "adds r5, #4 \n\t" /* k += 4 */
1887 "cmp r5, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
1888 "blt 1b \n\t" /* if not, loop back, start with i = 0 */
1889 "cmp r5, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
1890 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
1891 /* end outer loop */
1893 "str r2, [r0, r5] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
1894 "pop {r0} \n\t" /* pop p_result off the stack */
1896 ".syntax divided \n\t"
1897 : [r0] "+l" (r0), [r1] "+l" (r1)
1898 : [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
1899 : "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory"
1903 #define asm_square 1
1904 #endif /* !asm_square */
1905 #endif /* uECC_SQUARE_FUNC */