include/asm-generic/tlb.h

   1 /* include/asm-generic/tlb.h
   2  *
   3  *      Generic TLB shootdown code
   4  *
   5  * Copyright 2001 Red Hat, Inc.
   6  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
   7  *
   8  * Copyright 2011 Red Hat, Inc., Peter Zijlstra
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version
  13  * 2 of the License, or (at your option) any later version.
  14  */
  15 #ifndef _ASM_GENERIC__TLB_H
  16 #define _ASM_GENERIC__TLB_H
  17
  18 #include <linux/mmu_notifier.h>
  19 #include <linux/swap.h>
  20 #include <asm/pgalloc.h>
  21 #include <asm/tlbflush.h>
  22
  23 /*
  24  * Blindly accessing user memory from NMI context can be dangerous
  25  * if we're in the middle of switching the current user task or switching
  26  * the loaded mm.
  27  */
  28 #ifndef nmi_uaccess_okay
  29 # define nmi_uaccess_okay() true
  30 #endif
  31
  32 #ifdef CONFIG_MMU
  33
  34 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
  35 /*
  36  * Semi RCU freeing of the page directories.
  37  *
  38  * This is needed by some architectures to implement software pagetable walkers.
  39  *
  40  * gup_fast() and other software pagetable walkers do a lockless page-table
  41  * walk and therefore needs some synchronization with the freeing of the page
  42  * directories. The chosen means to accomplish that is by disabling IRQs over
  43  * the walk.
  44  *
  45  * Architectures that use IPIs to flush TLBs will then automagically DTRT,
  46  * since we unlink the page, flush TLBs, free the page. Since the disabling of
  47  * IRQs delays the completion of the TLB flush we can never observe an already
  48  * freed page.
  49  *
  50  * Architectures that do not have this (PPC) need to delay the freeing by some
  51  * other means, this is that means.
  52  *
  53  * What we do is batch the freed directory pages (tables) and RCU free them.
  54  * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
  55  * holds off grace periods.
  56  *
  57  * However, in order to batch these pages we need to allocate storage, this
  58  * allocation is deep inside the MM code and can thus easily fail on memory
  59  * pressure. To guarantee progress we fall back to single table freeing, see
  60  * the implementation of tlb_remove_table_one().
  61  *
  62  */
  63 struct mmu_table_batch {
  64         struct rcu_head         rcu;
  65         unsigned int            nr;
  66         void                    *tables[0];
  67 };
  68
  69 #define MAX_TABLE_BATCH         \
  70         ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
  71
  72 extern void tlb_table_flush(struct mmu_gather *tlb);
  73 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  74
  75 #endif
  76
  77 /*
  78  * If we can't allocate a page to make a big batch of page pointers
  79  * to work on, then just handle a few from the on-stack structure.
  80  */
  81 #define MMU_GATHER_BUNDLE       8
  82
  83 struct mmu_gather_batch {
  84         struct mmu_gather_batch *next;
  85         unsigned int            nr;
  86         unsigned int            max;
  87         struct page             *pages[0];
  88 };
  89
  90 #define MAX_GATHER_BATCH        \
  91         ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
  92
  93 /*
  94  * Limit the maximum number of mmu_gather batches to reduce a risk of soft
  95  * lockups for non-preemptible kernels on huge machines when a lot of memory
  96  * is zapped during unmapping.
  97  * 10K pages freed at once should be safe even without a preemption point.
  98  */
  99 #define MAX_GATHER_BATCH_COUNT  (10000UL/MAX_GATHER_BATCH)
 100
 101 /* struct mmu_gather is an opaque type used by the mm code for passing around
 102  * any data needed by arch specific code for tlb_remove_page.
 103  */
 104 struct mmu_gather {
 105         struct mm_struct        *mm;
 106 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 107         struct mmu_table_batch  *batch;
 108 #endif
 109         unsigned long           start;
 110         unsigned long           end;
 111         /*
 112          * we are in the middle of an operation to clear
 113          * a full mm and can make some optimizations
 114          */
 115         unsigned int            fullmm : 1;
 116
 117         /*
 118          * we have performed an operation which
 119          * requires a complete flush of the tlb
 120          */
 121         unsigned int            need_flush_all : 1;
 122
 123         /*
 124          * we have removed page directories
 125          */
 126         unsigned int            freed_tables : 1;
 127
 128         /*
 129          * at which levels have we cleared entries?
 130          */
 131         unsigned int            cleared_ptes : 1;
 132         unsigned int            cleared_pmds : 1;
 133         unsigned int            cleared_puds : 1;
 134         unsigned int            cleared_p4ds : 1;
 135
 136         struct mmu_gather_batch *active;
 137         struct mmu_gather_batch local;
 138         struct page             *__pages[MMU_GATHER_BUNDLE];
 139         unsigned int            batch_count;
 140         int page_size;
 141 };
 142
 143 #define HAVE_GENERIC_MMU_GATHER
 144
 145 void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 146         struct mm_struct *mm, unsigned long start, unsigned long end);
 147 void tlb_flush_mmu(struct mmu_gather *tlb);
 148 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 149                          unsigned long start, unsigned long end, bool force);
 150 void tlb_flush_mmu_free(struct mmu_gather *tlb);
 151 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 152                                    int page_size);
 153
 154 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 155                                       unsigned long address,
 156                                       unsigned int range_size)
 157 {
 158         tlb->start = min(tlb->start, address);
 159         tlb->end = max(tlb->end, address + range_size);
 160 }
 161
 162 static inline void __tlb_reset_range(struct mmu_gather *tlb)
 163 {
 164         if (tlb->fullmm) {
 165                 tlb->start = tlb->end = ~0;
 166         } else {
 167                 tlb->start = TASK_SIZE;
 168                 tlb->end = 0;
 169         }
 170         tlb->freed_tables = 0;
 171         tlb->cleared_ptes = 0;
 172         tlb->cleared_pmds = 0;
 173         tlb->cleared_puds = 0;
 174         tlb->cleared_p4ds = 0;
 175 }
 176
 177 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 178 {
 179         if (!tlb->end)
 180                 return;
 181
 182         tlb_flush(tlb);
 183         mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 184         __tlb_reset_range(tlb);
 185 }
 186
 187 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 188                                         struct page *page, int page_size)
 189 {
 190         if (__tlb_remove_page_size(tlb, page, page_size))
 191                 tlb_flush_mmu(tlb);
 192 }
 193
 194 static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 195 {
 196         return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
 197 }
 198
 199 /* tlb_remove_page
 200  *      Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
 201  *      required.
 202  */
 203 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 204 {
 205         return tlb_remove_page_size(tlb, page, PAGE_SIZE);
 206 }
 207
 208 #ifndef tlb_remove_check_page_size_change
 209 #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
 210 static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 211                                                      unsigned int page_size)
 212 {
 213         /*
 214          * We don't care about page size change, just update
 215          * mmu_gather page size here so that debug checks
 216          * doesn't throw false warning.
 217          */
 218 #ifdef CONFIG_DEBUG_VM
 219         tlb->page_size = page_size;
 220 #endif
 221 }
 222 #endif
 223
 224 static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
 225 {
 226         if (tlb->cleared_ptes)
 227                 return PAGE_SHIFT;
 228         if (tlb->cleared_pmds)
 229                 return PMD_SHIFT;
 230         if (tlb->cleared_puds)
 231                 return PUD_SHIFT;
 232         if (tlb->cleared_p4ds)
 233                 return P4D_SHIFT;
 234
 235         return PAGE_SHIFT;
 236 }
 237
 238 static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
 239 {
 240         return 1UL << tlb_get_unmap_shift(tlb);
 241 }
 242
 243 /*
 244  * In the case of tlb vma handling, we can optimise these away in the
 245  * case where we're doing a full MM flush.  When we're doing a munmap,
 246  * the vmas are adjusted to only cover the region to be torn down.
 247  */
 248 #ifndef tlb_start_vma
 249 #define tlb_start_vma(tlb, vma) do { } while (0)
 250 #endif
 251
 252 #define __tlb_end_vma(tlb, vma)                                 \
 253         do {                                                    \
 254                 if (!tlb->fullmm)                               \
 255                         tlb_flush_mmu_tlbonly(tlb);             \
 256         } while (0)
 257
 258 #ifndef tlb_end_vma
 259 #define tlb_end_vma     __tlb_end_vma
 260 #endif
 261
 262 #ifndef __tlb_remove_tlb_entry
 263 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 264 #endif
 265
 266 /**
 267  * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
 268  *
 269  * Record the fact that pte's were really unmapped by updating the range,
 270  * so we can later optimise away the tlb invalidate.   This helps when
 271  * userspace is unmapping already-unmapped pages, which happens quite a lot.
 272  */
 273 #define tlb_remove_tlb_entry(tlb, ptep, address)                \
 274         do {                                                    \
 275                 __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 276                 tlb->cleared_ptes = 1;                          \
 277                 __tlb_remove_tlb_entry(tlb, ptep, address);     \
 278         } while (0)
 279
 280 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)        \
 281         do {                                                    \
 282                 unsigned long _sz = huge_page_size(h);          \
 283                 __tlb_adjust_range(tlb, address, _sz);          \
 284                 if (_sz == PMD_SIZE)                            \
 285                         tlb->cleared_pmds = 1;                  \
 286                 else if (_sz == PUD_SIZE)                       \
 287                         tlb->cleared_puds = 1;                  \
 288                 __tlb_remove_tlb_entry(tlb, ptep, address);     \
 289         } while (0)
 290
 291 /**
 292  * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
 293  * This is a nop so far, because only x86 needs it.
 294  */
 295 #ifndef __tlb_remove_pmd_tlb_entry
 296 #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
 297 #endif
 298
 299 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)                    \
 300         do {                                                            \
 301                 __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);       \
 302                 tlb->cleared_pmds = 1;                                  \
 303                 __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
 304         } while (0)
 305
 306 /**
 307  * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
 308  * invalidation. This is a nop so far, because only x86 needs it.
 309  */
 310 #ifndef __tlb_remove_pud_tlb_entry
 311 #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
 312 #endif
 313
 314 #define tlb_remove_pud_tlb_entry(tlb, pudp, address)                    \
 315         do {                                                            \
 316                 __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
 317                 tlb->cleared_puds = 1;                                  \
 318                 __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
 319         } while (0)
 320
 321 /*
 322  * For things like page tables caches (ie caching addresses "inside" the
 323  * page tables, like x86 does), for legacy reasons, flushing an
 324  * individual page had better flush the page table caches behind it. This
 325  * is definitely how x86 works, for example. And if you have an
 326  * architected non-legacy page table cache (which I'm not aware of
 327  * anybody actually doing), you're going to have some architecturally
 328  * explicit flushing for that, likely *separate* from a regular TLB entry
 329  * flush, and thus you'd need more than just some range expansion..
 330  *
 331  * So if we ever find an architecture
 332  * that would want something that odd, I think it is up to that
 333  * architecture to do its own odd thing, not cause pain for others
 334  * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
 335  *
 336  * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
 337  */
 338
 339 #ifndef pte_free_tlb
 340 #define pte_free_tlb(tlb, ptep, address)                        \
 341         do {                                                    \
 342                 __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 343                 tlb->freed_tables = 1;                          \
 344                 tlb->cleared_pmds = 1;                          \
 345                 __pte_free_tlb(tlb, ptep, address);             \
 346         } while (0)
 347 #endif
 348
 349 #ifndef pmd_free_tlb
 350 #define pmd_free_tlb(tlb, pmdp, address)                        \
 351         do {                                                    \
 352                 __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 353                 tlb->freed_tables = 1;                          \
 354                 tlb->cleared_puds = 1;                          \
 355                 __pmd_free_tlb(tlb, pmdp, address);             \
 356         } while (0)
 357 #endif
 358
 359 #ifndef __ARCH_HAS_4LEVEL_HACK
 360 #ifndef pud_free_tlb
 361 #define pud_free_tlb(tlb, pudp, address)                        \
 362         do {                                                    \
 363                 __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 364                 tlb->freed_tables = 1;                          \
 365                 tlb->cleared_p4ds = 1;                          \
 366                 __pud_free_tlb(tlb, pudp, address);             \
 367         } while (0)
 368 #endif
 369 #endif
 370
 371 #ifndef __ARCH_HAS_5LEVEL_HACK
 372 #ifndef p4d_free_tlb
 373 #define p4d_free_tlb(tlb, pudp, address)                        \
 374         do {                                                    \
 375                 __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 376                 tlb->freed_tables = 1;                          \
 377                 __p4d_free_tlb(tlb, pudp, address);             \
 378         } while (0)
 379 #endif
 380 #endif
 381
 382 #endif /* CONFIG_MMU */
 383
 384 #define tlb_migrate_finish(mm) do {} while (0)
 385
 386 #endif /* _ASM_GENERIC__TLB_H */