LoongArch: Add vDSO syscall __vdso_getcpu()
authorHuacai Chen <chenhuacai@loongson.cn>
Sat, 6 Aug 2022 07:19:33 +0000 (15:19 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Fri, 12 Aug 2022 05:10:11 +0000 (13:10 +0800)
We test 20 million times of getcpu(), the real syscall version take 25
seconds, while the vsyscall version take only 2.4 seconds.

Signed-off-by: Rui Wang <wangrui@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/include/asm/vdso.h
arch/loongarch/include/asm/vdso/vdso.h
arch/loongarch/kernel/vdso.c
arch/loongarch/vdso/Makefile
arch/loongarch/vdso/vdso.lds.S
arch/loongarch/vdso/vgetcpu.c [new file with mode: 0644]

index 8f8a0f9a4953ebdc5bedcd9f79d4b2b4baeb0dfc..d3ba35eb23e77082ea8ed564fe7378ad80df81b2 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef __ASM_VDSO_H
 #define __ASM_VDSO_H
 
+#include <linux/mm.h>
 #include <linux/mm_types.h>
 #include <vdso/datapage.h>
 
index 5a01643a65b3b2db6e9cade37cd6e1d45cc05424..3b55d32a0619cc861c52354db8a0acb7ad5a18f4 100644 (file)
@@ -8,6 +8,18 @@
 
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <asm/vdso.h>
+
+struct vdso_pcpu_data {
+       u32 node;
+} ____cacheline_aligned_in_smp;
+
+struct loongarch_vdso_data {
+       struct vdso_pcpu_data pdata[NR_CPUS];
+       struct vdso_data data[CS_BASES]; /* Arch-independent data */
+};
+
+#define VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data))
 
 static inline unsigned long get_vdso_base(void)
 {
@@ -24,7 +36,8 @@ static inline unsigned long get_vdso_base(void)
 
 static inline const struct vdso_data *get_vdso_data(void)
 {
-       return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
+       return (const struct vdso_data *)(get_vdso_base()
+                       - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
 }
 
 #endif /* __ASSEMBLY__ */
index e20c8ca874735ffea2afe575f8593400ab369821..f32c38abd791589527bbba2eda8bd91a7065244a 100644 (file)
 extern char vdso_start[], vdso_end[];
 
 /* Kernel-provided data used by the VDSO. */
-static union loongarch_vdso_data {
-       u8 page[PAGE_SIZE];
-       struct vdso_data data[CS_BASES];
+static union {
+       u8 page[VDSO_DATA_SIZE];
+       struct loongarch_vdso_data vdata;
 } loongarch_vdso_data __page_aligned_data;
-struct vdso_data *vdso_data = loongarch_vdso_data.data;
+
 static struct page *vdso_pages[] = { NULL };
+struct vdso_data *vdso_data = loongarch_vdso_data.vdata.data;
+struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata;
 
 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
 {
@@ -55,11 +57,14 @@ struct loongarch_vdso_info vdso_info = {
 
 static int __init init_vdso(void)
 {
-       unsigned long i, pfn;
+       unsigned long i, cpu, pfn;
 
        BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
        BUG_ON(!PAGE_ALIGNED(vdso_info.size));
 
+       for_each_possible_cpu(cpu)
+               vdso_pdata[cpu].node = cpu_to_node(cpu);
+
        pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
        for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
                vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
@@ -93,9 +98,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
        /*
         * Determine total area size. This includes the VDSO data itself
-        * and the data page.
+        * and the data pages.
         */
-       vvar_size = PAGE_SIZE;
+       vvar_size = VDSO_DATA_SIZE;
        size = vvar_size + info->size;
 
        data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
@@ -103,7 +108,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
                ret = data_addr;
                goto out;
        }
-       vdso_addr = data_addr + PAGE_SIZE;
+       vdso_addr = data_addr + VDSO_DATA_SIZE;
 
        vma = _install_special_mapping(mm, data_addr, vvar_size,
                                       VM_READ | VM_MAYREAD,
@@ -115,8 +120,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
        /* Map VDSO data page. */
        ret = remap_pfn_range(vma, data_addr,
-                             virt_to_phys(vdso_data) >> PAGE_SHIFT,
-                             PAGE_SIZE, PAGE_READONLY);
+                             virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
+                             vvar_size, PAGE_READONLY);
        if (ret)
                goto out;
 
index 92e40403225783256ae816584c5abba19c32bcb3..d89e2ac75f7b85a360e50e81186a9536a43acc2d 100644 (file)
@@ -6,7 +6,7 @@
 ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
 include $(srctree)/lib/vdso/Makefile
 
-obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
+obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
 
 # Common compiler flags between ABIs.
 ccflags-vdso := \
index 955f02de4a2dfea007b7e2831c43920f54c823e9..56ad855896dee7b8578e5ecad46aa189c9e0736a 100644 (file)
@@ -58,6 +58,7 @@ VERSION
 {
        LINUX_5.10 {
        global:
+               __vdso_getcpu;
                __vdso_clock_getres;
                __vdso_clock_gettime;
                __vdso_gettimeofday;
diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
new file mode 100644 (file)
index 0000000..43a0078
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Fast user context implementation of getcpu()
+ */
+
+#include <asm/vdso.h>
+#include <linux/getcpu.h>
+
+static __always_inline int read_cpu_id(void)
+{
+       int cpu_id;
+
+       __asm__ __volatile__(
+       "       rdtime.d $zero, %0\n"
+       : "=r" (cpu_id)
+       :
+       : "memory");
+
+       return cpu_id;
+}
+
+static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void)
+{
+       return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
+}
+
+int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
+{
+       int cpu_id;
+       const struct vdso_pcpu_data *data;
+
+       cpu_id = read_cpu_id();
+
+       if (cpu)
+               *cpu = cpu_id;
+
+       if (node) {
+               data = get_pcpu_data();
+               *node = data[cpu_id].node;
+       }
+
+       return 0;
+}