builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
authorWeiming Zhao <weimingz@codeaurora.org>
Tue, 6 Dec 2016 07:13:15 +0000 (07:13 +0000)
committerWeiming Zhao <weimingz@codeaurora.org>
Tue, 6 Dec 2016 07:13:15 +0000 (07:13 +0000)
This is a resubmit of r288710 due to breakage of Darwin armv7em.

llvm-svn: 288777

compiler-rt/lib/builtins/arm/aeabi_uidivmod.S
compiler-rt/lib/builtins/arm/udivsi3.S
compiler-rt/lib/builtins/assembly.h

index 4a89449..7098bc6 100644 (file)
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r2, r0 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+#else
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
@@ -35,6 +49,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
 
 NO_EXEC_STACK_DIRECTIVE
index 085f8fb..fcc472b 100644 (file)
@@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
 #else
        cmp     r1, #1
        bcc     LOCAL_LABEL(divby0)
+#if __ARM_ARCH_ISA_THUMB == 1
+       bne LOCAL_LABEL(num_neq_denom)
+       JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
        IT(eq)
        JMPc(lr, eq)
+#endif
        cmp     r0, r1
+#if __ARM_ARCH_ISA_THUMB == 1
+       bhs LOCAL_LABEL(num_ge_denom)
+       movs r0, #0
+       JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
        ITT(cc)
        movcc   r0, #0
        JMPc(lr, cc)
+#endif
+
        /*
         * Implement division using binary long division algorithm.
         *
@@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
         * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
         */
 
-#  ifdef __ARM_FEATURE_CLZ
+#  if defined(__ARM_FEATURE_CLZ)
        clz     ip, r0
        clz     r3, r1
        /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
@@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
        sub     ip, ip, r3, lsl #3
        mov     r3, #0
        bx      ip
-#  else
+#  else /* No CLZ Feature */
 #    if __ARM_ARCH_ISA_THUMB == 2
 #    error THUMB mode requires CLZ or UDIV
 #    endif
+#    if __ARM_ARCH_ISA_THUMB == 1
+#      define BLOCK_SIZE 10
+#    else
+#      define BLOCK_SIZE 12
+#    endif
+
        mov     r2, r0
+#    if __ARM_ARCH_ISA_THUMB == 1
+       mov ip, r0
+       adr r0, LOCAL_LABEL(div0block)
+       adds r0, #1
+#    else
        adr     ip, LOCAL_LABEL(div0block)
-
-       lsr     r3, r2, #16
+#    endif
+       lsrs    r3, r2, #16
        cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_16)
+       movs r2, r3
+       subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+#    else
        movhs   r2, r3
-       subhs   ip, ip, #(16 * 12)
+       subhs   ip, ip, #(16 * BLOCK_SIZE)
+#    endif
 
-       lsr     r3, r2, #8
+       lsrs    r3, r2, #8
        cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_8)
+       movs r2, r3
+       subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+#    else
        movhs   r2, r3
-       subhs   ip, ip, #(8 * 12)
+       subhs   ip, ip, #(8 * BLOCK_SIZE)
+#    endif
 
-       lsr     r3, r2, #4
+       lsrs    r3, r2, #4
        cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_4)
+       movs r2, r3
+       subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+#    else
        movhs   r2, r3
-       subhs   ip, #(4 * 12)
+       subhs   ip, #(4 * BLOCK_SIZE)
+#    endif
 
-       lsr     r3, r2, #2
+       lsrs    r3, r2, #2
        cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_2)
+       movs r2, r3
+       subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+#    else
        movhs   r2, r3
-       subhs   ip, ip, #(2 * 12)
+       subhs   ip, ip, #(2 * BLOCK_SIZE)
+#    endif
 
        /* Last block, no need to update r2 or r3. */
+#    if __ARM_ARCH_ISA_THUMB == 1
+       lsrs r3, r2, #1
+       cmp r3, r1
+       blo LOCAL_LABEL(skip_1)
+       subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+       movs r2, r0
+       mov r0, ip
+       movs r3, #0
+       JMP (r2)
+
+#    else
        cmp     r1, r2, lsr #1
-       subls   ip, ip, #(1 * 12)
+       subls   ip, ip, #(1 * BLOCK_SIZE)
 
-       mov     r3, #0
+       movs    r3, #0
 
        JMP(ip)
-#  endif
+#    endif
+#  endif /* __ARM_FEATURE_CLZ */
+
 
 #define        IMM     #
+       /* due to the range limit of branch in Thumb1, we have to place the
+                block closer */
+LOCAL_LABEL(divby0):
+       movs    r0, #0
+#      if defined(__ARM_EABI__)
+       bl      __aeabi_idiv0 // due to relocation limit, can't use b.
+#      endif
+       JMP(lr)
 
+
+#if __ARM_ARCH_ISA_THUMB == 1
+#define block(shift)                                                           \
+       lsls r2, r1, IMM shift;                                                      \
+       cmp r0, r2;                                                                  \
+       blo LOCAL_LABEL(block_skip_##shift);                                         \
+       subs r0, r0, r2;                                                             \
+       LOCAL_LABEL(block_skip_##shift) :;                                           \
+       adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+       /* TODO: if current location counter is not not word aligned, we don't
+                need the .p2align and nop */
+       /* Label div0block must be word-aligned. First align block 31 */
+       .p2align 2
+       nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
 #define block(shift)                                                           \
        cmp     r0, r1, lsl IMM shift;                                         \
        ITT(hs);                                                               \
        WIDE(addhs)     r3, r3, IMM (1 << shift);                              \
        WIDE(subhs)     r0, r0, r1, lsl IMM shift
+#endif
 
        block(31)
        block(30)
@@ -159,12 +252,14 @@ LOCAL_LABEL(div0block):
        JMP(lr)
 #endif /* __ARM_ARCH_EXT_IDIV__ */
 
+#if __ARM_ARCH_EXT_IDIV__
 LOCAL_LABEL(divby0):
-       mov     r0, #0
-#ifdef __ARM_EABI__
-       b       __aeabi_idiv0
-#else
-       JMP(lr)
+        mov     r0, #0
+#  ifdef __ARM_EABI__
+        b       __aeabi_idiv0
+#  else
+        JMP(lr)
+#  endif
 #endif
 
 END_COMPILERRT_FUNCTION(__udivsi3)
index 5e36b5a..377b3ea 100644 (file)
@@ -71,7 +71,8 @@
 #define ARM_HAS_BX
 #endif
 #if !defined(__ARM_FEATURE_CLZ) &&                                             \
-    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+    ((__ARM_ARCH >= 6 && __ARM_ARCH_PROFILE != 'M') ||                         \
+     (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
 #define __ARM_FEATURE_CLZ
 #endif