csky: optimize memcpy_{from,to}io() and memset_io()
authorGuo Ren <guoren@linux.alibaba.com>
Wed, 6 Apr 2022 13:32:22 +0000 (21:32 +0800)
committerGuo Ren <guoren@linux.alibaba.com>
Mon, 18 Apr 2022 13:23:55 +0000 (21:23 +0800)
Optimize memcpy_{from,to}io() and memset_io() by transferring in
64 bit as much as possible with minimized barrier usage.  This
simplest optimization brings faster throughput compare to current
byte-by-byte read and write with barrier in the loop. Code's
skeleton is taken from the powerpc & arm64.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
arch/csky/include/asm/io.h
arch/csky/kernel/Makefile
arch/csky/kernel/io.c [new file with mode: 0644]

index ed53f0b..4725bb9 100644 (file)
 #endif
 
 /*
+ * String version of I/O memory access operations.
+ */
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c,v,l)        __memset_io((c),(v),(l))
+#define memcpy_fromio(a,c,l)    __memcpy_fromio((a),(c),(l))
+#define memcpy_toio(c,a,l)      __memcpy_toio((c),(a),(l))
+
+/*
  * I/O memory mapping functions.
  */
 #define ioremap_wc(addr, size) \
index 6c0f360..4eb4142 100644 (file)
@@ -2,7 +2,7 @@
 extra-y := head.o vmlinux.lds
 
 obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
-obj-y += power.o syscall.o syscall_table.o setup.o
+obj-y += power.o syscall.o syscall_table.o setup.o io.o
 obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
 obj-y += probes/
 
diff --git a/arch/csky/kernel/io.c b/arch/csky/kernel/io.c
new file mode 100644 (file)
index 0000000..5883f13
--- /dev/null
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ */
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+       while (count && !IS_ALIGNED((unsigned long)from, 4)) {
+               *(u8 *)to = __raw_readb(from);
+               from++;
+               to++;
+               count--;
+       }
+
+       while (count >= 4) {
+               *(u32 *)to = __raw_readl(from);
+               from += 4;
+               to += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               *(u8 *)to = __raw_readb(from);
+               from++;
+               to++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ */
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+       while (count && !IS_ALIGNED((unsigned long)to, 4)) {
+               __raw_writeb(*(u8 *)from, to);
+               from++;
+               to++;
+               count--;
+       }
+
+       while (count >= 4) {
+               __raw_writel(*(u32 *)from, to);
+               from += 4;
+               to += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               __raw_writeb(*(u8 *)from, to);
+               from++;
+               to++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ */
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+       u32 qc = (u8)c;
+
+       qc |= qc << 8;
+       qc |= qc << 16;
+
+       while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
+               __raw_writeb(c, dst);
+               dst++;
+               count--;
+       }
+
+       while (count >= 4) {
+               __raw_writel(qc, dst);
+               dst += 4;
+               count -= 4;
+       }
+
+       while (count) {
+               __raw_writeb(c, dst);
+               dst++;
+               count--;
+       }
+}
+EXPORT_SYMBOL(__memset_io);