s390/mm: fix race on mm->context.flush_mm
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 17 Aug 2017 06:15:16 +0000 (08:15 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 6 Sep 2017 07:24:42 +0000 (09:24 +0200)
The order in __tlb_flush_mm_lazy is to flush TLB first and then clear
the mm->context.flush_mm bit. This can lead to missed flushes as the
bit can be set anytime, the order needs to be the other way aronud.

But this leads to a different race, __tlb_flush_mm_lazy may be called
on two CPUs concurrently. If mm->context.flush_mm is cleared first then
another CPU can bypass __tlb_flush_mm_lazy although the first CPU has
not done the flush yet. In a virtualized environment the time until the
flush is finally completed can be arbitrarily long.

Add a spinlock to serialize __tlb_flush_mm_lazy and use the function
in finish_arch_post_lock_switch as well.

Cc: <stable@vger.kernel.org>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/mmu.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/tlbflush.h

index bd6f303..3525fe6 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/errno.h>
 
 typedef struct {
+       spinlock_t lock;
        cpumask_t cpu_attach_mask;
        atomic_t flush_count;
        unsigned int flush_mm;
@@ -27,6 +28,7 @@ typedef struct {
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)                                             \
+       .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock),           \
        .context.pgtable_lock =                                            \
                        __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock),   \
        .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
index 8823e35..484efe8 100644 (file)
@@ -17,6 +17,7 @@
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       spin_lock_init(&mm->context.lock);
        spin_lock_init(&mm->context.pgtable_lock);
        INIT_LIST_HEAD(&mm->context.pgtable_list);
        spin_lock_init(&mm->context.gmap_lock);
@@ -121,8 +122,7 @@ static inline void finish_arch_post_lock_switch(void)
                while (atomic_read(&mm->context.flush_count))
                        cpu_relax();
                cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-               if (mm->context.flush_mm)
-                       __tlb_flush_mm(mm);
+               __tlb_flush_mm_lazy(mm);
                preempt_enable();
        }
        set_fs(current->thread.mm_segment);
index 16fe2a3..b08d5bc 100644 (file)
@@ -101,10 +101,12 @@ static inline void __tlb_flush_kernel(void)
 
 static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
+       spin_lock(&mm->context.lock);
        if (mm->context.flush_mm) {
-               __tlb_flush_mm(mm);
                mm->context.flush_mm = 0;
+               __tlb_flush_mm(mm);
        }
+       spin_unlock(&mm->context.lock);
 }
 
 /*