/****************************************************************************
*
- * Copyright 2016 Samsung Electronics All Rights Reserved.
+ * Copyright 2016-2017 Samsung Electronics All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*
****************************************************************************/
/************************************************************************************
- * tinyara/arch/arm/src/armv7-r/arm_memcpy.S
+ * TizenRT/arch/arm/src/armv7-r/arm_memcpy.S
*
- * ARMv7-R optimised memcpy, based on the ARMv7-M version contributed by Mike Smith.
- * Apparently in the public domain and is re-released here under the modified BSD
- * license:
+ * ARMv7-R optimized memcpy.
+ *
+ * Adapted for use with ARMv7-R and NuttX by:
+ *
+ * Copyright (C) 2017 Gregory Nutt. All rights reserved.
+ * Author: Gregory Nutt <gnutt@nuttx.org>
+ *
+ * Based on the ARMv7-M version contributed by Mike Smith. Apparently in the public
+ * domain and is re-released here under the modified BSD license:
*
* Obtained via a posting on the Stellaris forum:
* http://e2e.ti.com/support/microcontrollers/\
*/
MEM_DataCopyTable:
- .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1
- .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1
+ .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 2
+ .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 2
.align 2
MEM_LongCopyTable:
- .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */
- .byte 0 /* 4 bytes left */
- .byte (1 * 10) >> 1 /* 8 bytes left */
- .byte (2 * 10) >> 1 /* 12 bytes left */
- .byte (3 * 10) >> 1 /* 16 bytes left */
- .byte (4 * 10) >> 1 /* 20 bytes left */
- .byte (5 * 10) >> 1 /* 24 bytes left */
- .byte (6 * 10) >> 1 /* 28 bytes left */
- .byte (7 * 10) >> 1 /* 32 bytes left */
- .byte (8 * 10) >> 1 /* 36 bytes left */
-
- .align 2
+ .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 2 /* 0 bytes left */
+ .byte (MEM_LongCopyJump0 - MEM_LongCopyJump) >> 2 /* 4 bytes left */
+ .byte (MEM_LongCopyJump1 - MEM_LongCopyJump) >> 2 /* 8 bytes left */
+ .byte (MEM_LongCopyJump2 - MEM_LongCopyJump) >> 2 /* 12 bytes left */
+ .byte (MEM_LongCopyJump3 - MEM_LongCopyJump) >> 2 /* 16 bytes left */
+ .byte (MEM_LongCopyJump4 - MEM_LongCopyJump) >> 2 /* 20 bytes left */
+ .byte (MEM_LongCopyJump5 - MEM_LongCopyJump) >> 2 /* 24 bytes left */
+ .byte (MEM_LongCopyJump6 - MEM_LongCopyJump) >> 2 /* 28 bytes left */
+ .byte (MEM_LongCopyJump7 - MEM_LongCopyJump) >> 2 /* 32 bytes left */
+ .byte (MEM_LongCopyJump8 - MEM_LongCopyJump) >> 2 /* 36 bytes left */
/************************************************************************************
* Public Functions
* Name: memcpy
*
* Description:
- * Optimised "general" copy routine
+ * Optimized "general" copy routine
*
* Input Parameters:
* r0 = destination, r1 = source, r2 = length
_do_memcpy:
push {r14}
+ push {r4}
/* This allows the inner workings to "assume" a minimum amount of bytes */
/* Quickly check for very short copies */
cmp r2, #4
- blt.n MEM_DataCopyBytes
+ blt MEM_DataCopyBytes
and r14, r0, #3 /* Get destination alignment bits */
bfi r14, r1, #2, #2 /* Get source alignment bits */
- ldr r3, =MEM_DataCopyTable /* Jump table base */
- tbb [r3, r14] /* Perform jump on src/dst alignment bits */
-MEM_DataCopyJump:
+
+ ldr r3, =MEM_DataCopyTable /* Jump table base address */
+ ldrb r4, [r3, r14] /* DWord offset for this alignment combination */
+ ldr r3, =MEM_DataCopyJump /* Base of branch table anchor */
+ add r3, r3, r4, lsl #2 /* Absolute address of logic */
+ bx r3
+
+ /* data copy branch table anchor */
.align 4
+MEM_DataCopyJump:
/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
* 3 bytes to read for long word aligning
MEM_DataCopy0:
/* Save regs that may be used by memcpy */
- push {r4-r12}
+ push {r5-r12}
/* Check for short word-aligned copy */
cmp r2, #0x28
- blt.n MEM_DataCopy0_2
+ blt MEM_DataCopy0_2
/* Bulk copy loop */
stmia r0!, {r3-r12}
sub r2, r2, #0x28
cmp r2, #0x28
- bge.n MEM_DataCopy0_1
+ bge MEM_DataCopy0_1
/* Copy remaining long words */
MEM_DataCopy0_2:
- /* Copy remaining long words */
-
- ldr r14, =MEM_LongCopyTable
- lsr r11, r2, #0x02
- tbb [r14, r11]
+ ldr r14, =MEM_LongCopyTable /* Jump table base address */
+ lsr r11, r2, 2 /* Convert byte count to word count */
+ add r14, r14, r11 /* Jump table offset address */
+ ldrb r3, [r14] /* DWord offset from branch table anchor */
+ ldr r11, =MEM_LongCopyJump /* Address of branch table anchor */
+ add r11, r11, r3, lsl #2 /* Absolute address into branch table */
+ bx r11 /* Go there */
/* longword copy branch table anchor */
MEM_LongCopyJump:
- ldr.w r3, [r1], #0x04 /* 4 bytes remain */
- str.w r3, [r0], #0x04
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r4} /* 8 bytes remain */
- stmia.w r0!, {r3-r4}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r5} /* 12 bytes remain */
- stmia.w r0!, {r3-r5}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r6} /* 16 bytes remain */
- stmia.w r0!, {r3-r6}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r7} /* 20 bytes remain */
- stmia.w r0!, {r3-r7}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r8} /* 24 bytes remain */
- stmia.w r0!, {r3-r8}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r9} /* 28 bytes remain */
- stmia.w r0!, {r3-r9}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r10} /* 32 bytes remain */
- stmia.w r0!, {r3-r10}
- b.n MEM_LongCopyEnd
- ldmia.w r1!, {r3-r11} /* 36 bytes remain */
- stmia.w r0!, {r3-r11}
+
+MEM_LongCopyJump0:
+ ldr r3, [r1], #0x04 /* 4 bytes remain */
+ str r3, [r0], #0x04
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump1:
+ ldmia r1!, {r3-r4} /* 8 bytes remain */
+ stmia r0!, {r3-r4}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump2:
+ ldmia r1!, {r3-r5} /* 12 bytes remain */
+ stmia r0!, {r3-r5}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump3:
+ ldmia r1!, {r3-r6} /* 16 bytes remain */
+ stmia r0!, {r3-r6}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump4:
+ ldmia r1!, {r3-r7} /* 20 bytes remain */
+ stmia r0!, {r3-r7}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump5:
+ ldmia r1!, {r3-r8} /* 24 bytes remain */
+ stmia r0!, {r3-r8}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump6:
+ ldmia r1!, {r3-r9} /* 28 bytes remain */
+ stmia r0!, {r3-r9}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump7:
+ ldmia r1!, {r3-r10} /* 32 bytes remain */
+ stmia r0!, {r3-r10}
+ b MEM_LongCopyEnd
+
+MEM_LongCopyJump8:
+ ldmia r1!, {r3-r11} /* 36 bytes remain */
+ stmia r0!, {r3-r11}
MEM_LongCopyEnd:
- pop {r4-r12}
+ pop {r5-r12}
and r2, r2, #0x03 /* All the longs have been copied */
/* Deal with up to 3 remaining bytes */
MEM_DataCopyBytes:
/* Deal with up to 3 remaining bytes */
+ pop {r4}
cmp r2, #0x00
it eq
popeq {pc}
+
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
subs r2, r2, #0x01
it eq
popeq {pc}
+
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
subs r2, r2, #0x01
it eq
popeq {pc}
+
ldrb r3, [r1], #0x01
strb r3, [r0], #0x01
pop {pc}
MEM_DataCopy2:
cmp r2, #0x28
- blt.n MEM_DataCopy2_1
+ blt MEM_DataCopy2_1
/* Save regs */
- push {r4-r12}
+ push {r5-r12}
/* Bulk copy loop */
sub r2, r2, #0x28
cmp r2, #0x28
- bge.n MEM_DataCopy2_2
- pop {r4-r12}
+ bge MEM_DataCopy2_2
+ pop {r5-r12}
MEM_DataCopy2_1: /* Read longs and write 2 x half words */
cmp r2, #4
- blt.n MEM_DataCopyBytes
+ blt MEM_DataCopyBytes
ldr r3, [r1], #0x04
strh r3, [r0], #0x02
lsr r3, r3, #0x10
strh r3, [r0], #0x02
sub r2, r2, #0x04
- b.n MEM_DataCopy2
+ b MEM_DataCopy2
/* Bits: Src=01, Dst=00 - Byte before half word to long
* Bits: Src=01, Dst=10 - Byte before half word to half word
lsr r3, r3, #0x10
strb r3, [r0], #0x01
sub r2, r2, #0x04
- b.n MEM_DataCopy3
+ b MEM_DataCopy3
.size memcpy, .-memcpy
.end