s390/vdso: implement generic vdso time namespace support
authorHeiko Carstens <hca@linux.ibm.com>
Fri, 5 Feb 2021 15:19:32 +0000 (16:19 +0100)
committerVasily Gorbik <gor@linux.ibm.com>
Tue, 9 Feb 2021 14:57:05 +0000 (15:57 +0100)
Implement generic vdso time namespace support which also enables time
namespaces for s390. This is quite similar to what arm64 has.

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/Kconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/include/asm/vdso.h
arch/s390/include/asm/vdso/gettimeofday.h
arch/s390/kernel/vdso.c
arch/s390/kernel/vdso64/vdso64.lds.S

index 41a2c58..5de9f40 100644 (file)
@@ -129,6 +129,7 @@ config S390
        select GENERIC_PTDUMP
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_TIME_VSYSCALL
+       select GENERIC_VDSO_TIME_NS
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
index 0200ccf..acf982a 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
 # CONFIG_CPU_ISOLATION is not set
 # CONFIG_UTS_NS is not set
+# CONFIG_TIME_NS is not set
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
index e4ea142..b45e3dd 100644 (file)
@@ -7,6 +7,8 @@
 /* Default link address for the vDSO */
 #define VDSO64_LBASE   0
 
+#define __VVAR_PAGES   2
+
 #define VDSO_VERSION_STRING    LINUX_2.6.29
 
 #ifndef __ASSEMBLY__
index c92b0de..ed89ef7 100644 (file)
@@ -67,4 +67,11 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
        return r2;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+       return _timens_data;
+}
+#endif
+
 #endif
index 31920b7..dd967af 100644 (file)
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
+#include <linux/time_namespace.h>
 #include <vdso/datapage.h>
 #include <asm/vdso.h>
 
 extern char vdso64_start[], vdso64_end[];
 static unsigned int vdso_pages;
 
+static struct vm_special_mapping vvar_mapping;
+
 static union {
        struct vdso_data        data[CS_BASES];
        u8                      page[PAGE_SIZE];
@@ -28,6 +31,12 @@ static union {
 
 struct vdso_data *vdso_data = vdso_data_store.data;
 
+enum vvar_pages {
+       VVAR_DATA_PAGE_OFFSET,
+       VVAR_TIMENS_PAGE_OFFSET,
+       VVAR_NR_PAGES,
+};
+
 unsigned int __read_mostly vdso_enabled = 1;
 
 static int __init vdso_setup(char *str)
@@ -40,12 +49,89 @@ static int __init vdso_setup(char *str)
 }
 __setup("vdso=", vdso_setup);
 
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+       return (struct vdso_data *)(vvar_page);
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_mm == current->mm))
+               return current->nsproxy->time_ns->vvar_page;
+       /*
+        * VM_PFNMAP | VM_IO protect .fault() handler from being called
+        * through interfaces like /proc/$pid/mem or
+        * process_vm_{readv,writev}() as long as there's no .access()
+        * in special_mapping_vmops().
+        * For more details check_vma_flags() and __access_remote_vm()
+        */
+       WARN(1, "vvar_page accessed remotely");
+       return NULL;
+}
+
+/*
+ * The VVAR page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+       struct mm_struct *mm = task->mm;
+       struct vm_area_struct *vma;
+
+       mmap_read_lock(mm);
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               unsigned long size = vma->vm_end - vma->vm_start;
+
+               if (!vma_is_special_mapping(vma, &vvar_mapping))
+                       continue;
+               zap_page_range(vma, vma->vm_start, size);
+               break;
+       }
+       mmap_read_unlock(mm);
+       return 0;
+}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       return NULL;
+}
+#endif
+
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
                             struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       if (vmf->pgoff == 0)
-               return vmf_insert_pfn(vma, vmf->address, virt_to_pfn(vdso_data));
-       return VM_FAULT_SIGBUS;
+       struct page *timens_page = find_timens_vvar_page(vma);
+       unsigned long pfn;
+
+       switch (vmf->pgoff) {
+       case VVAR_DATA_PAGE_OFFSET:
+               if (timens_page)
+                       pfn = page_to_pfn(timens_page);
+               else
+                       pfn = virt_to_pfn(vdso_data);
+               break;
+#ifdef CONFIG_TIME_NS
+       case VVAR_TIMENS_PAGE_OFFSET:
+               /*
+                * If a task belongs to a time namespace then a namespace
+                * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+                * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+                * offset.
+                * See also the comment near timens_setup_vdso_data().
+                */
+               if (!timens_page)
+                       return VM_FAULT_SIGBUS;
+               pfn = virt_to_pfn(vdso_data);
+               break;
+#endif /* CONFIG_TIME_NS */
+       default:
+               return VM_FAULT_SIGBUS;
+       }
+       return vmf_insert_pfn(vma, vmf->address, pfn);
 }
 
 static int vdso_mremap(const struct vm_special_mapping *sm,
@@ -80,23 +166,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
        struct vm_area_struct *vma;
        int rc;
 
+       BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
        if (!vdso_enabled || is_compat_task())
                return 0;
        if (mmap_write_lock_killable(mm))
                return -EINTR;
        vdso_text_len = vdso_pages << PAGE_SHIFT;
-       vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+       vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
        vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
        rc = vvar_start;
        if (IS_ERR_VALUE(vvar_start))
                goto out;
-       vma = _install_special_mapping(mm, vvar_start, PAGE_SIZE,
-                                      VM_READ|VM_MAYREAD|VM_PFNMAP,
+       vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
+                                      VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+                                      VM_PFNMAP,
                                       &vvar_mapping);
        rc = PTR_ERR(vma);
        if (IS_ERR(vma))
                goto out;
-       vdso_text_start = vvar_start + PAGE_SIZE;
+       vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
        /* VM_MAYWRITE for COW so gdb can set breakpoints */
        vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
                                       VM_READ|VM_EXEC|
index 99063b4..518f1ea 100644 (file)
@@ -13,7 +13,10 @@ ENTRY(_start)
 
 SECTIONS
 {
-       PROVIDE(_vdso_data = . - PAGE_SIZE);
+       PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+       PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
        . = VDSO64_LBASE + SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text