2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/swap_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "swap_kprobes_deps.h"
33 #include "swap_kdebug.h"
36 #include <linux/slab.h>
39 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
40 /* kernel define 'pgd_offset_k' redefinition */
42 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
45 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
48 static unsigned long swap_zero_pfn = 0;
50 #endif /* is_zero_pfn */
51 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
53 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
54 static inline void *swap_kmap_atomic(struct page *page)
56 return kmap_atomic(page);
58 static inline void swap_kunmap_atomic(void *kvaddr)
60 kunmap_atomic(kvaddr);
62 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
63 static inline void *swap_kmap_atomic(struct page *page)
65 return kmap_atomic(page, KM_USER0);
68 static inline void swap_kunmap_atomic(void *kvaddr)
70 kunmap_atomic(kvaddr, KM_USER0);
72 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
74 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
75 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
76 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
78 struct file *file, unsigned long addr,
79 unsigned long len, unsigned long prot,
80 unsigned long flags, unsigned long pgoff,
81 unsigned long *populate)
82 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
83 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
84 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
85 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
87 struct file *file, unsigned long addr,
88 unsigned long len, unsigned long prot,
89 unsigned long flags, unsigned long pgoff)
90 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
91 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */
93 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
94 EXPORT_SYMBOL_GPL(swap_do_mmap_pgoff);
95 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
97 /* copy_to_user_page */
98 #ifndef copy_to_user_page
99 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
100 DECLARE_MOD_DEP_WRAPPER(swap_copy_to_user_page,
102 struct vm_area_struct *vma, struct page *page,
103 unsigned long uaddr, void *dst, const void *src,
105 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
106 #else /* copy_to_user_page */
107 #define swap_copy_to_user_page copy_to_user_page
108 #endif /* copy_to_user_page */
111 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
113 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
114 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
115 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
118 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
119 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
121 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
122 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
123 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
124 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
125 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
127 #ifdef __HAVE_ARCH_GATE_AREA
128 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
129 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
130 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
131 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
132 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
133 #endif /* __HAVE_ARCH_GATE_AREA */
135 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
136 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
137 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
138 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
139 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
141 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
142 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
143 struct page *, struct vm_area_struct * vma, \
144 unsigned long address, unsigned int foll_flags, \
145 unsigned int *page_mask);
146 DECLARE_MOD_DEP_WRAPPER(swap_follow_page_mask,
148 struct vm_area_struct * vma, unsigned long address,
149 unsigned int foll_flags, unsigned int *page_mask)
150 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
151 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
152 static DECLARE_MOD_FUNC_DEP(follow_page, \
153 struct page *, struct vm_area_struct * vma, \
154 unsigned long address, unsigned int foll_flags);
155 DECLARE_MOD_DEP_WRAPPER(swap_follow_page,
157 struct vm_area_struct * vma, unsigned long address,
158 unsigned int foll_flags)
159 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
160 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
162 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
163 void, struct vm_area_struct *vma, struct page *page, \
164 unsigned long vmaddr);
165 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
166 struct page *, struct vm_area_struct *vma, \
167 unsigned long addr, pte_t pte);
170 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
171 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
172 void, struct task_struct *tsk);
174 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
175 void, struct rcu_head * rhp);
178 DECLARE_MOD_DEP_WRAPPER(swap_find_extend_vma,
179 struct vm_area_struct *,
180 struct mm_struct * mm, unsigned long addr)
181 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
183 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
184 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
185 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
187 struct mm_struct *mm, struct vm_area_struct *vma,
188 unsigned long address, int write_access)
189 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
192 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
194 struct mm_struct *mm, struct vm_area_struct *vma,
195 unsigned long address, unsigned int flags)
196 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
199 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
200 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
201 struct vm_area_struct *,
202 struct mm_struct *mm)
203 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
204 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
205 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
206 struct vm_area_struct *,
207 struct task_struct *tsk)
208 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
209 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
211 #ifdef CONFIG_HUGETLB_PAGE
213 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
214 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, \
216 struct mm_struct *mm, struct vm_area_struct *vma, \
217 struct page **pages, struct vm_area_struct **vmas, \
218 unsigned long *position, int *length, int i, \
220 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
222 struct mm_struct *mm, struct vm_area_struct *vma,
223 struct page **pages, struct vm_area_struct **vmas,
224 unsigned long *position, int *length, int i,
226 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page, \
227 mm, vma, pages, vmas, position, length, i, flags)
228 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
229 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, \
231 struct mm_struct *mm, struct vm_area_struct *vma, \
232 struct page **pages, struct vm_area_struct **vmas, \
233 unsigned long *position, unsigned long *nr_pages, \
234 long i, unsigned int flags);
235 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
237 struct mm_struct *mm, struct vm_area_struct *vma,
238 struct page **pages, struct vm_area_struct **vmas,
239 unsigned long *position, unsigned long *nr_pages,
240 long i, unsigned int flags)
241 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page, \
242 mm, vma, pages, vmas, position, nr_pages, i, flags)
243 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
245 #else /* CONFIG_HUGETLB_PAGE */
246 #define swap_follow_hugetlb_page follow_hugetlb_page
247 #endif /* CONFIG_HUGETLB_PAGE */
249 static inline int swap_in_gate_area(struct task_struct *task, unsigned long addr)
251 #ifdef __HAVE_ARCH_GATE_AREA
252 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
253 struct mm_struct *mm = task->mm;
254 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
255 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
256 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
257 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
258 #else /*__HAVE_ARCH_GATE_AREA */
259 return in_gate_area(task, addr);
260 #endif/*__HAVE_ARCH_GATE_AREA */
264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
265 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_mm, int, unsigned long addr)
266 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
267 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
268 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_task, int, unsigned long addr)
269 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
270 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
272 static inline int swap_in_gate_area_no_xxx(unsigned long addr)
274 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
275 return swap_in_gate_area_no_mm(addr);
276 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
277 return swap_in_gate_area_no_task(addr);
278 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
281 DECLARE_MOD_DEP_WRAPPER(swap__flush_anon_page,
283 struct vm_area_struct *vma, struct page *page,
284 unsigned long vmaddr)
285 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
287 static inline void swap_flush_anon_page(struct vm_area_struct *vma,
289 unsigned long vmaddr)
291 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
293 swap__flush_anon_page(vma, page, vmaddr);
294 #else /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
295 flush_anon_page(vma, page, vmaddr);
296 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
299 DECLARE_MOD_DEP_WRAPPER(swap_vm_normal_page,
301 struct vm_area_struct *vma, unsigned long addr,
303 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
308 int init_module_dependencies(void)
311 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
312 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
315 #ifndef copy_to_user_page
316 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
317 #endif /* copy_to_user_page */
319 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
320 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
322 #ifdef CONFIG_HUGETLB_PAGE
323 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
326 #ifdef __HAVE_ARCH_GATE_AREA
327 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
330 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
331 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
332 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
333 INIT_MOD_DEP_VAR(follow_page, follow_page);
334 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
336 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
339 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
340 #endif /* is_zero_pfn */
342 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
343 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
344 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
345 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
347 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
348 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
349 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
351 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
353 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
354 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
355 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
357 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
360 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
362 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
363 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
364 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
369 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
370 #define GUP_FLAGS_WRITE 0x1
371 #define GUP_FLAGS_FORCE 0x2
372 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
373 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
374 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
376 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
377 static inline int use_zero_page(struct vm_area_struct *vma)
380 * We don't want to optimize FOLL_ANON for make_pages_present()
381 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
382 * we want to get the page from the page tables to make sure
383 * that we serialize and update with any other user of that
386 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
389 * And if we have a fault routine, it's not an anonymous region.
391 return !vma->vm_ops || !vma->vm_ops->fault;
394 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
396 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
398 #ifdef __HAVE_COLOR_ZERO_PAGE
400 static inline int swap_is_zero_pfn(unsigned long pfn)
402 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
403 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
406 #else /* __HAVE_COLOR_ZERO_PAGE */
408 static inline int swap_is_zero_pfn(unsigned long pfn)
410 return pfn == swap_zero_pfn;
412 #endif /* __HAVE_COLOR_ZERO_PAGE */
414 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
416 static inline int swap_is_zero_pfn(unsigned long pfn)
419 return pfn == swap_zero_pfn;
420 #else /* is_zero_pfn */
421 return is_zero_pfn(pfn);
422 #endif /* is_zero_pfn */
425 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
427 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
429 return stack_guard_page_start(vma, addr) ||
430 stack_guard_page_end(vma, addr+PAGE_SIZE);
433 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
435 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
436 unsigned long start, unsigned long nr_pages,
437 unsigned int gup_flags, struct page **pages,
438 struct vm_area_struct **vmas, int *nonblocking)
441 unsigned long vm_flags;
442 unsigned int page_mask;
447 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
450 * Require read or write permissions.
451 * If FOLL_FORCE is set, we only require the "MAY" flags.
453 vm_flags = (gup_flags & FOLL_WRITE) ?
454 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
455 vm_flags &= (gup_flags & FOLL_FORCE) ?
456 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
459 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
460 * would be called on PROT_NONE ranges. We must never invoke
461 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
462 * page faults would unprotect the PROT_NONE ranges if
463 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
464 * bitflag. So to avoid that, don't set FOLL_NUMA if
467 if (!(gup_flags & FOLL_FORCE))
468 gup_flags |= FOLL_NUMA;
473 struct vm_area_struct *vma;
475 vma = swap_find_extend_vma(mm, start);
476 if (!vma && swap_in_gate_area(tsk, start)) {
477 unsigned long pg = start & PAGE_MASK;
483 /* user gate pages are read-only */
484 if (gup_flags & FOLL_WRITE)
485 return i ? : -EFAULT;
487 pgd = pgd_offset_k(pg);
489 pgd = pgd_offset_gate(mm, pg);
490 BUG_ON(pgd_none(*pgd));
491 pud = pud_offset(pgd, pg);
492 BUG_ON(pud_none(*pud));
493 pmd = pmd_offset(pud, pg);
495 return i ? : -EFAULT;
496 VM_BUG_ON(pmd_trans_huge(*pmd));
497 pte = pte_offset_map(pmd, pg);
498 if (pte_none(*pte)) {
500 return i ? : -EFAULT;
502 vma = swap_get_gate_vma(mm);
506 page = swap_vm_normal_page(vma, start, *pte);
508 if (!(gup_flags & FOLL_DUMP) &&
509 swap_is_zero_pfn(pte_pfn(*pte)))
510 page = pte_page(*pte);
513 return i ? : -EFAULT;
525 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
526 !(vm_flags & vma->vm_flags))
527 return i ? : -EFAULT;
529 if (is_vm_hugetlb_page(vma)) {
530 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
531 &start, &nr_pages, i, gup_flags);
537 unsigned int foll_flags = gup_flags;
538 unsigned int page_increm;
541 * If we have a pending SIGKILL, don't keep faulting
542 * pages and potentially allocating memory.
544 if (unlikely(fatal_signal_pending(current)))
545 return i ? i : -ERESTARTSYS;
547 /* cond_resched(); */
548 while (!(page = swap_follow_page_mask(vma, start,
549 foll_flags, &page_mask))) {
551 unsigned int fault_flags = 0;
553 /* For mlock, just skip the stack guard page. */
554 if (foll_flags & FOLL_MLOCK) {
555 if (stack_guard_page(vma, start))
558 if (foll_flags & FOLL_WRITE)
559 fault_flags |= FAULT_FLAG_WRITE;
561 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
562 if (foll_flags & FOLL_NOWAIT)
563 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
565 ret = swap_handle_mm_fault(mm, vma, start,
568 if (ret & VM_FAULT_ERROR) {
569 if (ret & VM_FAULT_OOM)
570 return i ? i : -ENOMEM;
571 if (ret & (VM_FAULT_HWPOISON |
572 VM_FAULT_HWPOISON_LARGE)) {
575 else if (gup_flags & FOLL_HWPOISON)
580 if (ret & VM_FAULT_SIGBUS)
581 return i ? i : -EFAULT;
586 if (ret & VM_FAULT_MAJOR)
592 if (ret & VM_FAULT_RETRY) {
599 * The VM_FAULT_WRITE bit tells us that
600 * do_wp_page has broken COW when necessary,
601 * even if maybe_mkwrite decided not to set
602 * pte_write. We can thus safely do subsequent
603 * page lookups as if they were reads. But only
604 * do so when looping for pte_write is futile:
605 * in some cases userspace may also be wanting
606 * to write to the gotten user page, which a
607 * read fault here might prevent (a readonly
608 * page might get reCOWed by userspace write).
610 if ((ret & VM_FAULT_WRITE) &&
611 !(vma->vm_flags & VM_WRITE))
612 foll_flags &= ~FOLL_WRITE;
614 /* cond_resched(); */
617 return i ? i : PTR_ERR(page);
621 swap_flush_anon_page(vma, page, start);
622 flush_dcache_page(page);
630 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
631 if (page_increm > nr_pages)
632 page_increm = nr_pages;
634 start += page_increm * PAGE_SIZE;
635 nr_pages -= page_increm;
636 } while (nr_pages && start < vma->vm_end);
641 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
643 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
644 unsigned long start, int nr_pages, unsigned int gup_flags,
645 struct page **pages, struct vm_area_struct **vmas,
649 unsigned long vm_flags;
655 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
658 * Require read or write permissions.
659 * If FOLL_FORCE is set, we only require the "MAY" flags.
661 vm_flags = (gup_flags & FOLL_WRITE) ?
662 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
663 vm_flags &= (gup_flags & FOLL_FORCE) ?
664 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
668 struct vm_area_struct *vma;
670 vma = swap_find_extend_vma(mm, start);
671 if (!vma && swap_in_gate_area_no_xxx(start)) {
672 unsigned long pg = start & PAGE_MASK;
678 /* user gate pages are read-only */
679 if (gup_flags & FOLL_WRITE) {
680 return i ? : -EFAULT;
683 pgd = pgd_offset_k(pg);
685 pgd = pgd_offset_gate(mm, pg);
686 BUG_ON(pgd_none(*pgd));
687 pud = pud_offset(pgd, pg);
688 BUG_ON(pud_none(*pud));
689 pmd = pmd_offset(pud, pg);
690 if (pmd_none(*pmd)) {
691 return i ? : -EFAULT;
693 VM_BUG_ON(pmd_trans_huge(*pmd));
694 pte = pte_offset_map(pmd, pg);
695 if (pte_none(*pte)) {
697 return i ? : -EFAULT;
699 vma = swap_get_gate_vma(mm);
703 page = swap_vm_normal_page(vma, start, *pte);
705 if (!(gup_flags & FOLL_DUMP) &&
706 swap_is_zero_pfn(pte_pfn(*pte)))
707 page = pte_page(*pte);
710 return i ? : -EFAULT;
721 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
722 !(vm_flags & vma->vm_flags)) {
723 return i ? : -EFAULT;
726 if (is_vm_hugetlb_page(vma)) {
727 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
728 &start, &nr_pages, i, gup_flags);
734 unsigned int foll_flags = gup_flags;
737 * If we have a pending SIGKILL, don't keep faulting
738 * pages and potentially allocating memory.
740 if (unlikely(fatal_signal_pending(current))) {
741 return i ? i : -ERESTARTSYS;
744 /* cond_resched(); */
745 while (!(page = swap_follow_page(vma, start, foll_flags))) {
747 unsigned int fault_flags = 0;
749 /* For mlock, just skip the stack guard page. */
750 if (foll_flags & FOLL_MLOCK) {
751 if (stack_guard_page(vma, start))
754 if (foll_flags & FOLL_WRITE)
755 fault_flags |= FAULT_FLAG_WRITE;
757 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
758 if (foll_flags & FOLL_NOWAIT)
759 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
761 ret = swap_handle_mm_fault(mm, vma, start,
764 if (ret & VM_FAULT_ERROR) {
765 if (ret & VM_FAULT_OOM) {
766 return i ? i : -ENOMEM;
768 if (ret & (VM_FAULT_HWPOISON |
769 VM_FAULT_HWPOISON_LARGE)) {
773 else if (gup_flags & FOLL_HWPOISON) {
780 if (ret & VM_FAULT_SIGBUS) {
781 return i ? i : -EFAULT;
787 if (ret & VM_FAULT_MAJOR)
793 if (ret & VM_FAULT_RETRY) {
800 * The VM_FAULT_WRITE bit tells us that
801 * do_wp_page has broken COW when necessary,
802 * even if maybe_mkwrite decided not to set
803 * pte_write. We can thus safely do subsequent
804 * page lookups as if they were reads. But only
805 * do so when looping for pte_write is futile:
806 * in some cases userspace may also be wanting
807 * to write to the gotten user page, which a
808 * read fault here might prevent (a readonly
809 * page might get reCOWed by userspace write).
811 if ((ret & VM_FAULT_WRITE) &&
812 !(vma->vm_flags & VM_WRITE))
813 foll_flags &= ~FOLL_WRITE;
815 /* cond_resched(); */
818 return i ? i : PTR_ERR(page);
823 swap_flush_anon_page(vma, page, start);
824 flush_dcache_page(page);
832 } while (nr_pages && start < vma->vm_end);
838 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
840 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
842 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
843 unsigned long start, int len, int flags,
844 struct page **pages, struct vm_area_struct **vmas)
847 unsigned int vm_flags = 0;
848 int write = !!(flags & GUP_FLAGS_WRITE);
849 int force = !!(flags & GUP_FLAGS_FORCE);
850 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
855 * Require read or write permissions.
856 * If 'force' is set, we only require the "MAY" flags.
858 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
859 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
863 struct vm_area_struct *vma;
864 unsigned int foll_flags;
866 vma = find_vma(mm, start);
867 if (!vma && swap_in_gate_area(tsk, start)) {
868 unsigned long pg = start & PAGE_MASK;
869 struct vm_area_struct *gate_vma = swap_get_gate_vma(tsk);
875 /* user gate pages are read-only */
876 if (!ignore && write)
877 return i ? : -EFAULT;
879 pgd = pgd_offset_k(pg);
881 pgd = pgd_offset_gate(mm, pg);
882 BUG_ON(pgd_none(*pgd));
883 pud = pud_offset(pgd, pg);
884 BUG_ON(pud_none(*pud));
885 pmd = pmd_offset(pud, pg);
887 return i ? : -EFAULT;
888 pte = pte_offset_map(pmd, pg);
889 if (pte_none(*pte)) {
891 return i ? : -EFAULT;
894 struct page *page = swap_vm_normal_page(gate_vma, start, *pte);
909 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
910 (!ignore && !(vm_flags & vma->vm_flags)))
911 return i ? : -EFAULT;
913 if (is_vm_hugetlb_page(vma)) {
914 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
915 &start, &len, i, write);
919 foll_flags = FOLL_TOUCH;
921 foll_flags |= FOLL_GET;
923 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
924 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
925 if (!write && use_zero_page(vma))
926 foll_flags |= FOLL_ANON;
934 foll_flags |= FOLL_WRITE;
939 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
940 while (!(page = swap_follow_page(vma, start, foll_flags))) {
942 ret = swap_handle_mm_fault(mm, vma, start,
943 foll_flags & FOLL_WRITE);
945 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
946 if (ret & VM_FAULT_WRITE)
947 foll_flags &= ~FOLL_WRITE;
949 switch (ret & ~VM_FAULT_WRITE) {
956 case VM_FAULT_SIGBUS:
957 return i ? i : -EFAULT;
959 return i ? i : -ENOMEM;
965 if (ret & VM_FAULT_ERROR) {
966 if (ret & VM_FAULT_OOM)
967 return i ? i : -ENOMEM;
968 else if (ret & VM_FAULT_SIGBUS)
969 return i ? i : -EFAULT;
972 if (ret & VM_FAULT_MAJOR)
978 * The VM_FAULT_WRITE bit tells us that
979 * do_wp_page has broken COW when necessary,
980 * even if maybe_mkwrite decided not to set
981 * pte_write. We can thus safely do subsequent
982 * page lookups as if they were reads. But only
983 * do so when looping for pte_write is futile:
984 * in some cases userspace may also be wanting
985 * to write to the gotten user page, which a
986 * read fault here might prevent (a readonly
987 * page might get reCOWed by userspace write).
989 if ((ret & VM_FAULT_WRITE) &&
990 !(vma->vm_flags & VM_WRITE))
991 foll_flags &= ~FOLL_WRITE;
999 return i ? i : PTR_ERR(page);
1003 swap_flush_anon_page(vma, page, start);
1004 flush_dcache_page(page);
1011 } while (len && start < vma->vm_end);
1016 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1018 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
1019 unsigned long start, int len, int write, int force,
1020 struct page **pages, struct vm_area_struct **vmas)
1022 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1023 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1024 int flags = FOLL_TOUCH;
1029 flags |= FOLL_WRITE;
1031 flags |= FOLL_FORCE;
1032 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1036 flags |= GUP_FLAGS_WRITE;
1038 flags |= GUP_FLAGS_FORCE;
1039 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1041 return __get_user_pages_uprobe(tsk, mm,
1043 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1045 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1047 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1049 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1053 #define ACCESS_PROCESS_OPTIMIZATION 0
1055 #if ACCESS_PROCESS_OPTIMIZATION
1057 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1058 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1060 static void read_data_current(unsigned long addr, void *buf, int len)
1065 for (step = GET_STEP_4(len); len; len -= step) {
1066 switch (GET_STEP_4(len)) {
1068 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1074 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1079 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1089 static void write_data_current(unsigned long addr, void *buf, int len)
1094 for (step = GET_STEP_4(len); len; len -= step) {
1095 switch (GET_STEP_4(len)) {
1097 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1103 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1108 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1118 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1120 struct mm_struct *mm;
1121 struct vm_area_struct *vma;
1122 void *old_buf = buf;
1129 #if ACCESS_PROCESS_OPTIMIZATION
1130 if (write == 0 && tsk == current) {
1131 read_data_current(addr, buf, len);
1136 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1140 /* FIXME: danger: write memory in atomic context */
1141 atomic = in_atomic();
1143 /* ignore errors, just check how much was successfully transferred */
1145 int bytes, ret, offset;
1147 struct page *page = NULL;
1149 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1150 write, 1, &page, &vma);
1154 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1155 * we can access using slightly different code.
1157 #ifdef CONFIG_HAVE_IOREMAP_PROT
1158 vma = find_vma(mm, addr);
1161 if (vma->vm_ops && vma->vm_ops->access)
1162 ret = vma->vm_ops->access(vma, addr, buf,
1170 offset = addr & (PAGE_SIZE-1);
1171 if (bytes > PAGE_SIZE-offset)
1172 bytes = PAGE_SIZE-offset;
1174 maddr = atomic ? swap_kmap_atomic(page) : kmap(page);
1177 swap_copy_to_user_page(vma, page, addr,
1178 maddr + offset, buf, bytes);
1179 set_page_dirty_lock(page);
1181 copy_from_user_page(vma, page, addr,
1182 buf, maddr + offset, bytes);
1185 atomic ? swap_kunmap_atomic(maddr) : kunmap(page);
1186 page_cache_release(page);
1193 return buf - old_buf;
1196 int page_present (struct mm_struct *mm, unsigned long address)
1204 pgd = pgd_offset(mm, address);
1205 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1208 pud = pud_offset(pgd, address);
1209 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1212 pmd = pmd_offset(pud, address);
1213 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1216 ptep = pte_offset_map(pmd, address);
1222 if (pte_present(pte)) {
1224 if (pfn_valid(pfn)) {
1234 EXPORT_SYMBOL_GPL (page_present);
1235 EXPORT_SYMBOL_GPL (access_process_vm_atomic);