ARM: 8619/1: udelay: document the various constants
authorNicolas Pitre <nicolas.pitre@linaro.org>
Fri, 7 Oct 2016 04:38:35 +0000 (05:38 +0100)
committerRussell King <rmk+kernel@armlinux.org.uk>
Wed, 19 Oct 2016 09:52:36 +0000 (10:52 +0100)
Explain where the value for UDELAY_MULT and UDELAY_SHIFT come from.
Also fix/clarify some comments pertaining to their usage in the
assembly code.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
arch/arm/include/asm/delay.h
arch/arm/lib/delay-loop.S

index b1ce037..e986b7f 100644 (file)
@@ -9,6 +9,33 @@
 #include <asm/memory.h>
 #include <asm/param.h> /* HZ */
 
+/*
+ * Loop (or tick) based delay:
+ *
+ * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
+ *
+ * where:
+ *
+ * jiffies_per_sec = HZ
+ * us_per_sec = 1000000
+ *
+ * Therefore the constant part is HZ / 1000000 which is a small
+ * fractional number. To make this usable with integer math, we
+ * scale up this constant by 2^31, perform the actual multiplication,
+ * and scale the result back down by 2^31 with a simple shift:
+ *
+ * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
+ *
+ * where:
+ *
+ * UDELAY_MULT = 2^31 * HZ / 1000000
+ *             = (2^31 / 1000000) * HZ
+ *             = 2147.483648 * HZ
+ *             = 2147 * HZ + 483648 * HZ / 1000000
+ *
+ * 31 is the biggest scale shift value that won't overflow 32 bits for
+ * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
+ */
 #define MAX_UDELAY_MS  2
 #define UDELAY_MULT    UL(2147 * HZ + 483648 * HZ / 1000000)
 #define UDELAY_SHIFT   31
index 792c59d..c766694 100644 (file)
 .LC1:          .word   UDELAY_MULT
 
 /*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ *
  * r0  <= 2000
  * HZ  <= 1000
  */
 
 ENTRY(__loop_udelay)
                ldr     r2, .LC1
-               mul     r0, r2, r0
-ENTRY(__loop_const_udelay)                     @ 0 <= r0 <= 0x7fffff06
+               mul     r0, r2, r0              @ r0 = delay_us * UDELAY_MULT
+ENTRY(__loop_const_udelay)                     @ 0 <= r0 <= 0xfffffaf0
                ldr     r2, .LC0
                ldr     r2, [r2]
-               umull   r1, r0, r2, r0
-               adds    r1, r1, #0xffffffff
-               adcs    r0, r0, r0
+               umull   r1, r0, r2, r0          @ r0-r1 = r0 * loops_per_jiffy
+               adds    r1, r1, #0xffffffff     @ rounding up ...
+               adcs    r0, r0, r0              @ and right shift by 31
                reteq   lr
 
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- */
                .align 3
 
 @ Delay routine