From: Joerg Sonnenberger Date: Sun, 20 Jul 2014 20:53:37 +0000 (+0000) Subject: Redo THUMB support. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9720fcf4bfd52bd50960d0a48f9b6ed7b60a8519;p=platform%2Fupstream%2Fllvm.git Redo THUMB support. Discussed with and tested by: Saleem Abdulrasool llvm-svn: 213481 --- diff --git a/compiler-rt/lib/builtins/arm/udivmodsi4.S b/compiler-rt/lib/builtins/arm/udivmodsi4.S index ddc8752..2bb65fe 100644 --- a/compiler-rt/lib/builtins/arm/udivmodsi4.S +++ b/compiler-rt/lib/builtins/arm/udivmodsi4.S @@ -17,6 +17,18 @@ .syntax unified .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +#if __ARM_ARCH_ISA_THUMB == 2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#else +#define IT(cond) +#define ITT(cond) +#endif + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) #if __ARM_ARCH_EXT_IDIV__ @@ -42,6 +54,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 12 * I. * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and @@ -53,12 +66,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else adr ip, LOCAL_LABEL(div0block) +# endif sub ip, ip, r3, lsl #2 sub ip, ip, r3, lsl #3 mov r3, #0 bx ip # else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif str r4, [sp, #-8]! mov r4, r0 @@ -98,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) #define block(shift) \ cmp r0, r1, lsl IMM shift; \ - addhs r3, r3, IMM (1 << shift); \ - subhs r0, r0, r1, lsl IMM shift + ITT(hs); \ + addhs.w r3, r3, IMM (1 << shift); \ + subhs.w r0, r0, r1, lsl IMM shift block(31) block(30) diff --git a/compiler-rt/lib/builtins/arm/udivsi3.S b/compiler-rt/lib/builtins/arm/udivsi3.S index 8fb1dca..19fea56 100644 --- a/compiler-rt/lib/builtins/arm/udivsi3.S +++ b/compiler-rt/lib/builtins/arm/udivsi3.S @@ -17,6 +17,18 @@ .syntax unified .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +#if __ARM_ARCH_ISA_THUMB == 2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#else +#define IT(cond) +#define ITT(cond) +#endif + .p2align 2 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) DEFINE_COMPILERRT_FUNCTION(__udivsi3) @@ -30,8 +42,10 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) #else cmp r1, #1 bcc LOCAL_LABEL(divby0) + IT(eq) JMPc(lr, eq) cmp r0, r1 + ITT(cc) movcc r0, #0 JMPc(lr, cc) /* @@ -43,6 +57,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 12 * I. * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and @@ -54,12 +69,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else adr ip, LOCAL_LABEL(div0block) +# endif sub ip, ip, r3, lsl #2 sub ip, ip, r3, lsl #3 mov r3, #0 bx ip # else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif mov r2, r0 adr ip, LOCAL_LABEL(div0block) @@ -96,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) #define block(shift) \ cmp r0, r1, lsl IMM shift; \ - addhs r3, r3, IMM (1 << shift); \ - subhs r0, r0, r1, lsl IMM shift + ITT(hs); \ + addhs.w r3, r3, IMM (1 << shift); \ + subhs.w r0, r0, r1, lsl IMM shift block(31) block(30) diff --git a/compiler-rt/lib/builtins/arm/umodsi3.S b/compiler-rt/lib/builtins/arm/umodsi3.S index 164646b..e81af0c 100644 --- a/compiler-rt/lib/builtins/arm/umodsi3.S +++ b/compiler-rt/lib/builtins/arm/umodsi3.S @@ -16,6 +16,17 @@ .syntax unified .text +#if __ARM_ARCH_ISA_THUMB == 2 + .thumb +#endif + +#if __ARM_ARCH_ISA_THUMB == 2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#else +#define IT(cond) +#define ITT(cond) +#endif .p2align 2 DEFINE_COMPILERRT_FUNCTION(__umodsi3) @@ -30,9 +41,11 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) #else cmp r1, #1 bcc LOCAL_LABEL(divby0) + ITT(eq) moveq r0, #0 JMPc(lr, eq) cmp r0, r1 + IT(cc) JMPc(lr, cc) /* * Implement division using binary long division algorithm. @@ -43,6 +56,7 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 8 * I. * This depends on the fixed instruction size of block. + * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and @@ -54,10 +68,18 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ sub r3, r3, ip +# if __ARM_ARCH_ISA_THUMB == 2 + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else adr ip, LOCAL_LABEL(div0block) +# endif sub ip, ip, r3, lsl #3 bx ip # else +# if __ARM_ARCH_ISA_THUMB == 2 +# error THUMB mode requires CLZ or UDIV +# endif mov r2, r0 adr ip, LOCAL_LABEL(div0block) @@ -90,9 +112,10 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) #define IMM # -#define block(shift) \ - cmp r0, r1, lsl IMM shift; \ - subhs r0, r0, r1, lsl IMM shift +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + IT(hs); \ + subhs.w r0, r0, r1, lsl IMM shift block(31) block(30)