2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/swap_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "swap_kprobes_deps.h"
33 #include "swap_kdebug.h"
36 #include <linux/slab.h>
39 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
40 /* kernel define 'pgd_offset_k' redefinition */
42 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
45 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
48 static unsigned long swap_zero_pfn = 0;
50 #endif /* is_zero_pfn */
51 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
53 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
54 static inline void *swap_kmap_atomic(struct page *page)
56 return kmap_atomic(page);
58 static inline void swap_kunmap_atomic(void *kvaddr)
60 kunmap_atomic(kvaddr);
62 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
63 static inline void *swap_kmap_atomic(struct page *page)
65 return kmap_atomic(page, KM_USER0);
68 static inline void swap_kunmap_atomic(void *kvaddr)
70 kunmap_atomic(kvaddr, KM_USER0);
72 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
74 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
75 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
76 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
78 struct file *file, unsigned long addr,
79 unsigned long len, unsigned long prot,
80 unsigned long flags, unsigned long pgoff,
81 unsigned long *populate)
82 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
83 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
84 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
85 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
87 struct file *file, unsigned long addr,
88 unsigned long len, unsigned long prot,
89 unsigned long flags, unsigned long pgoff)
90 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
91 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
93 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
94 EXPORT_SYMBOL_GPL(swap_do_mmap_pgoff);
95 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
97 /* copy_to_user_page */
98 #ifndef copy_to_user_page
99 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
100 DECLARE_MOD_DEP_WRAPPER(swap_copy_to_user_page,
102 struct vm_area_struct *vma, struct page *page,
103 unsigned long uaddr, void *dst, const void *src,
105 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
106 #else /* copy_to_user_page */
107 #define swap_copy_to_user_page copy_to_user_page
108 #endif /* copy_to_user_page */
111 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
113 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
114 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
115 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
118 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
119 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
121 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
122 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
123 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
124 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
125 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
127 #ifdef CONFIG_HUGETLB_PAGE
128 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
129 struct vm_area_struct *vma, struct page **pages, \
130 struct vm_area_struct **vmas, unsigned long *position, int *length, \
134 #ifdef __HAVE_ARCH_GATE_AREA
135 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
136 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
137 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
138 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
139 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
140 #endif /* __HAVE_ARCH_GATE_AREA */
142 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
143 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
144 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
145 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
146 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
148 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
149 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
150 struct page *, struct vm_area_struct * vma, \
151 unsigned long address, unsigned int foll_flags, \
152 unsigned int *page_mask);
153 DECLARE_MOD_DEP_WRAPPER(swap_follow_page_mask,
155 struct vm_area_struct * vma, unsigned long address,
156 unsigned int foll_flags, unsigned int *page_mask)
157 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
158 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
159 static DECLARE_MOD_FUNC_DEP(follow_page, \
160 struct page *, struct vm_area_struct * vma, \
161 unsigned long address, unsigned int foll_flags);
162 DECLARE_MOD_DEP_WRAPPER(swap_follow_page,
164 struct vm_area_struct * vma, unsigned long address,
165 unsigned int foll_flags)
166 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
167 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
169 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
170 void, struct vm_area_struct *vma, struct page *page, \
171 unsigned long vmaddr);
172 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
173 struct page *, struct vm_area_struct *vma, \
174 unsigned long addr, pte_t pte);
177 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
178 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
179 void, struct task_struct *tsk);
181 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
182 void, struct rcu_head * rhp);
185 DECLARE_MOD_DEP_WRAPPER(swap_find_extend_vma,
186 struct vm_area_struct *,
187 struct mm_struct * mm, unsigned long addr)
188 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
190 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
191 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
192 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
194 struct mm_struct *mm, struct vm_area_struct *vma,
195 unsigned long address, int write_access)
196 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
199 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
201 struct mm_struct *mm, struct vm_area_struct *vma,
202 unsigned long address, unsigned int flags)
203 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
206 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
207 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
208 struct vm_area_struct *,
209 struct mm_struct *mm)
210 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
211 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
212 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
213 struct vm_area_struct *,
214 struct task_struct *tsk)
215 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
216 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
218 #ifdef CONFIG_HUGETLB_PAGE
219 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
221 struct mm_struct *mm, struct vm_area_struct *vma,
222 struct page **pages, struct vm_area_struct **vmas,
223 unsigned long *position, int *length, int i,
225 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
226 #else /* CONFIG_HUGETLB_PAGE */
227 #define swap_follow_hugetlb_page follow_hugetlb_page
228 #endif /* CONFIG_HUGETLB_PAGE */
230 static inline int swap_in_gate_area(struct task_struct *task, unsigned long addr)
232 #ifdef __HAVE_ARCH_GATE_AREA
233 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
234 struct mm_struct *mm = task->mm;
235 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
236 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
237 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
238 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
239 #else /*__HAVE_ARCH_GATE_AREA */
240 return in_gate_area(task, addr);
241 #endif/*__HAVE_ARCH_GATE_AREA */
245 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
246 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_mm, int, unsigned long addr)
247 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
248 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
249 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_task, int, unsigned long addr)
250 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
251 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
253 static inline int swap_in_gate_area_no_xxx(unsigned long addr)
255 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
256 return swap_in_gate_area_no_mm(addr);
257 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
258 return swap_in_gate_area_no_task(addr);
259 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
262 DECLARE_MOD_DEP_WRAPPER(swap__flush_anon_page,
264 struct vm_area_struct *vma, struct page *page,
265 unsigned long vmaddr)
266 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
268 static inline void swap_flush_anon_page(struct vm_area_struct *vma,
270 unsigned long vmaddr)
272 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
274 swap__flush_anon_page(vma, page, vmaddr);
275 #else /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
276 flush_anon_page(vma, page, vmaddr);
277 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
280 DECLARE_MOD_DEP_WRAPPER(swap_vm_normal_page,
282 struct vm_area_struct *vma, unsigned long addr,
284 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
289 int init_module_dependencies(void)
292 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
293 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
296 #ifndef copy_to_user_page
297 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
298 #endif /* copy_to_user_page */
300 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
301 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
303 #ifdef CONFIG_HUGETLB_PAGE
304 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
307 #ifdef __HAVE_ARCH_GATE_AREA
308 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
311 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
312 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
313 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
314 INIT_MOD_DEP_VAR(follow_page, follow_page);
315 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
317 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
320 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
321 #endif /* is_zero_pfn */
323 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
324 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
325 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
326 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
328 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
329 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
330 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
332 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
334 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
335 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
336 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
338 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
341 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
343 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
344 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
345 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
350 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
351 #define GUP_FLAGS_WRITE 0x1
352 #define GUP_FLAGS_FORCE 0x2
353 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
354 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
355 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
357 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
358 static inline int use_zero_page(struct vm_area_struct *vma)
361 * We don't want to optimize FOLL_ANON for make_pages_present()
362 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
363 * we want to get the page from the page tables to make sure
364 * that we serialize and update with any other user of that
367 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
370 * And if we have a fault routine, it's not an anonymous region.
372 return !vma->vm_ops || !vma->vm_ops->fault;
375 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
377 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
379 #ifdef __HAVE_COLOR_ZERO_PAGE
381 static inline int swap_is_zero_pfn(unsigned long pfn)
383 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
384 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
387 #else /* __HAVE_COLOR_ZERO_PAGE */
389 static inline int swap_is_zero_pfn(unsigned long pfn)
391 return pfn == swap_zero_pfn;
393 #endif /* __HAVE_COLOR_ZERO_PAGE */
395 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
397 static inline int swap_is_zero_pfn(unsigned long pfn)
400 return pfn == swap_zero_pfn;
401 #else /* is_zero_pfn */
402 return is_zero_pfn(pfn);
403 #endif /* is_zero_pfn */
406 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
408 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
410 return stack_guard_page_start(vma, addr) ||
411 stack_guard_page_end(vma, addr+PAGE_SIZE);
414 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
416 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
417 unsigned long start, unsigned long nr_pages,
418 unsigned int gup_flags, struct page **pages,
419 struct vm_area_struct **vmas, int *nonblocking)
422 unsigned long vm_flags;
423 unsigned int page_mask;
428 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
431 * Require read or write permissions.
432 * If FOLL_FORCE is set, we only require the "MAY" flags.
434 vm_flags = (gup_flags & FOLL_WRITE) ?
435 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
436 vm_flags &= (gup_flags & FOLL_FORCE) ?
437 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
440 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
441 * would be called on PROT_NONE ranges. We must never invoke
442 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
443 * page faults would unprotect the PROT_NONE ranges if
444 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
445 * bitflag. So to avoid that, don't set FOLL_NUMA if
448 if (!(gup_flags & FOLL_FORCE))
449 gup_flags |= FOLL_NUMA;
454 struct vm_area_struct *vma;
456 vma = swap_find_extend_vma(mm, start);
457 if (!vma && swap_in_gate_area(tsk, start)) {
458 unsigned long pg = start & PAGE_MASK;
464 /* user gate pages are read-only */
465 if (gup_flags & FOLL_WRITE)
466 return i ? : -EFAULT;
468 pgd = pgd_offset_k(pg);
470 pgd = pgd_offset_gate(mm, pg);
471 BUG_ON(pgd_none(*pgd));
472 pud = pud_offset(pgd, pg);
473 BUG_ON(pud_none(*pud));
474 pmd = pmd_offset(pud, pg);
476 return i ? : -EFAULT;
477 VM_BUG_ON(pmd_trans_huge(*pmd));
478 pte = pte_offset_map(pmd, pg);
479 if (pte_none(*pte)) {
481 return i ? : -EFAULT;
483 vma = swap_get_gate_vma(mm);
487 page = swap_vm_normal_page(vma, start, *pte);
489 if (!(gup_flags & FOLL_DUMP) &&
490 swap_is_zero_pfn(pte_pfn(*pte)))
491 page = pte_page(*pte);
494 return i ? : -EFAULT;
506 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
507 !(vm_flags & vma->vm_flags))
508 return i ? : -EFAULT;
510 if (is_vm_hugetlb_page(vma)) {
511 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
512 &start, &nr_pages, i, gup_flags);
518 unsigned int foll_flags = gup_flags;
519 unsigned int page_increm;
522 * If we have a pending SIGKILL, don't keep faulting
523 * pages and potentially allocating memory.
525 if (unlikely(fatal_signal_pending(current)))
526 return i ? i : -ERESTARTSYS;
528 /* cond_resched(); */
529 while (!(page = swap_follow_page_mask(vma, start,
530 foll_flags, &page_mask))) {
532 unsigned int fault_flags = 0;
534 /* For mlock, just skip the stack guard page. */
535 if (foll_flags & FOLL_MLOCK) {
536 if (stack_guard_page(vma, start))
539 if (foll_flags & FOLL_WRITE)
540 fault_flags |= FAULT_FLAG_WRITE;
542 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
543 if (foll_flags & FOLL_NOWAIT)
544 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
546 ret = swap_handle_mm_fault(mm, vma, start,
549 if (ret & VM_FAULT_ERROR) {
550 if (ret & VM_FAULT_OOM)
551 return i ? i : -ENOMEM;
552 if (ret & (VM_FAULT_HWPOISON |
553 VM_FAULT_HWPOISON_LARGE)) {
556 else if (gup_flags & FOLL_HWPOISON)
561 if (ret & VM_FAULT_SIGBUS)
562 return i ? i : -EFAULT;
567 if (ret & VM_FAULT_MAJOR)
573 if (ret & VM_FAULT_RETRY) {
580 * The VM_FAULT_WRITE bit tells us that
581 * do_wp_page has broken COW when necessary,
582 * even if maybe_mkwrite decided not to set
583 * pte_write. We can thus safely do subsequent
584 * page lookups as if they were reads. But only
585 * do so when looping for pte_write is futile:
586 * in some cases userspace may also be wanting
587 * to write to the gotten user page, which a
588 * read fault here might prevent (a readonly
589 * page might get reCOWed by userspace write).
591 if ((ret & VM_FAULT_WRITE) &&
592 !(vma->vm_flags & VM_WRITE))
593 foll_flags &= ~FOLL_WRITE;
595 /* cond_resched(); */
598 return i ? i : PTR_ERR(page);
602 swap_flush_anon_page(vma, page, start);
603 flush_dcache_page(page);
611 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
612 if (page_increm > nr_pages)
613 page_increm = nr_pages;
615 start += page_increm * PAGE_SIZE;
616 nr_pages -= page_increm;
617 } while (nr_pages && start < vma->vm_end);
622 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
624 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
625 unsigned long start, int nr_pages, unsigned int gup_flags,
626 struct page **pages, struct vm_area_struct **vmas,
630 unsigned long vm_flags;
636 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
639 * Require read or write permissions.
640 * If FOLL_FORCE is set, we only require the "MAY" flags.
642 vm_flags = (gup_flags & FOLL_WRITE) ?
643 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
644 vm_flags &= (gup_flags & FOLL_FORCE) ?
645 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
649 struct vm_area_struct *vma;
651 vma = swap_find_extend_vma(mm, start);
652 if (!vma && swap_in_gate_area_no_xxx(start)) {
653 unsigned long pg = start & PAGE_MASK;
659 /* user gate pages are read-only */
660 if (gup_flags & FOLL_WRITE) {
661 return i ? : -EFAULT;
664 pgd = pgd_offset_k(pg);
666 pgd = pgd_offset_gate(mm, pg);
667 BUG_ON(pgd_none(*pgd));
668 pud = pud_offset(pgd, pg);
669 BUG_ON(pud_none(*pud));
670 pmd = pmd_offset(pud, pg);
671 if (pmd_none(*pmd)) {
672 return i ? : -EFAULT;
674 VM_BUG_ON(pmd_trans_huge(*pmd));
675 pte = pte_offset_map(pmd, pg);
676 if (pte_none(*pte)) {
678 return i ? : -EFAULT;
680 vma = swap_get_gate_vma(mm);
684 page = swap_vm_normal_page(vma, start, *pte);
686 if (!(gup_flags & FOLL_DUMP) &&
687 swap_is_zero_pfn(pte_pfn(*pte)))
688 page = pte_page(*pte);
691 return i ? : -EFAULT;
702 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
703 !(vm_flags & vma->vm_flags)) {
704 return i ? : -EFAULT;
707 if (is_vm_hugetlb_page(vma)) {
708 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
709 &start, &nr_pages, i, gup_flags);
715 unsigned int foll_flags = gup_flags;
718 * If we have a pending SIGKILL, don't keep faulting
719 * pages and potentially allocating memory.
721 if (unlikely(fatal_signal_pending(current))) {
722 return i ? i : -ERESTARTSYS;
725 /* cond_resched(); */
726 while (!(page = swap_follow_page(vma, start, foll_flags))) {
728 unsigned int fault_flags = 0;
730 /* For mlock, just skip the stack guard page. */
731 if (foll_flags & FOLL_MLOCK) {
732 if (stack_guard_page(vma, start))
735 if (foll_flags & FOLL_WRITE)
736 fault_flags |= FAULT_FLAG_WRITE;
738 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
739 if (foll_flags & FOLL_NOWAIT)
740 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
742 ret = swap_handle_mm_fault(mm, vma, start,
745 if (ret & VM_FAULT_ERROR) {
746 if (ret & VM_FAULT_OOM) {
747 return i ? i : -ENOMEM;
749 if (ret & (VM_FAULT_HWPOISON |
750 VM_FAULT_HWPOISON_LARGE)) {
754 else if (gup_flags & FOLL_HWPOISON) {
761 if (ret & VM_FAULT_SIGBUS) {
762 return i ? i : -EFAULT;
768 if (ret & VM_FAULT_MAJOR)
774 if (ret & VM_FAULT_RETRY) {
781 * The VM_FAULT_WRITE bit tells us that
782 * do_wp_page has broken COW when necessary,
783 * even if maybe_mkwrite decided not to set
784 * pte_write. We can thus safely do subsequent
785 * page lookups as if they were reads. But only
786 * do so when looping for pte_write is futile:
787 * in some cases userspace may also be wanting
788 * to write to the gotten user page, which a
789 * read fault here might prevent (a readonly
790 * page might get reCOWed by userspace write).
792 if ((ret & VM_FAULT_WRITE) &&
793 !(vma->vm_flags & VM_WRITE))
794 foll_flags &= ~FOLL_WRITE;
796 /* cond_resched(); */
799 return i ? i : PTR_ERR(page);
804 swap_flush_anon_page(vma, page, start);
805 flush_dcache_page(page);
813 } while (nr_pages && start < vma->vm_end);
819 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
821 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
823 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
824 unsigned long start, int len, int flags,
825 struct page **pages, struct vm_area_struct **vmas)
828 unsigned int vm_flags = 0;
829 int write = !!(flags & GUP_FLAGS_WRITE);
830 int force = !!(flags & GUP_FLAGS_FORCE);
831 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
836 * Require read or write permissions.
837 * If 'force' is set, we only require the "MAY" flags.
839 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
840 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
844 struct vm_area_struct *vma;
845 unsigned int foll_flags;
847 vma = find_vma(mm, start);
848 if (!vma && swap_in_gate_area(tsk, start)) {
849 unsigned long pg = start & PAGE_MASK;
850 struct vm_area_struct *gate_vma = swap_get_gate_vma(tsk);
856 /* user gate pages are read-only */
857 if (!ignore && write)
858 return i ? : -EFAULT;
860 pgd = pgd_offset_k(pg);
862 pgd = pgd_offset_gate(mm, pg);
863 BUG_ON(pgd_none(*pgd));
864 pud = pud_offset(pgd, pg);
865 BUG_ON(pud_none(*pud));
866 pmd = pmd_offset(pud, pg);
868 return i ? : -EFAULT;
869 pte = pte_offset_map(pmd, pg);
870 if (pte_none(*pte)) {
872 return i ? : -EFAULT;
875 struct page *page = swap_vm_normal_page(gate_vma, start, *pte);
890 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
891 (!ignore && !(vm_flags & vma->vm_flags)))
892 return i ? : -EFAULT;
894 if (is_vm_hugetlb_page(vma)) {
895 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
896 &start, &len, i, write);
900 foll_flags = FOLL_TOUCH;
902 foll_flags |= FOLL_GET;
904 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
905 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
906 if (!write && use_zero_page(vma))
907 foll_flags |= FOLL_ANON;
915 foll_flags |= FOLL_WRITE;
920 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
921 while (!(page = swap_follow_page(vma, start, foll_flags))) {
923 ret = swap_handle_mm_fault(mm, vma, start,
924 foll_flags & FOLL_WRITE);
926 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
927 if (ret & VM_FAULT_WRITE)
928 foll_flags &= ~FOLL_WRITE;
930 switch (ret & ~VM_FAULT_WRITE) {
937 case VM_FAULT_SIGBUS:
938 return i ? i : -EFAULT;
940 return i ? i : -ENOMEM;
946 if (ret & VM_FAULT_ERROR) {
947 if (ret & VM_FAULT_OOM)
948 return i ? i : -ENOMEM;
949 else if (ret & VM_FAULT_SIGBUS)
950 return i ? i : -EFAULT;
953 if (ret & VM_FAULT_MAJOR)
959 * The VM_FAULT_WRITE bit tells us that
960 * do_wp_page has broken COW when necessary,
961 * even if maybe_mkwrite decided not to set
962 * pte_write. We can thus safely do subsequent
963 * page lookups as if they were reads. But only
964 * do so when looping for pte_write is futile:
965 * in some cases userspace may also be wanting
966 * to write to the gotten user page, which a
967 * read fault here might prevent (a readonly
968 * page might get reCOWed by userspace write).
970 if ((ret & VM_FAULT_WRITE) &&
971 !(vma->vm_flags & VM_WRITE))
972 foll_flags &= ~FOLL_WRITE;
980 return i ? i : PTR_ERR(page);
984 swap_flush_anon_page(vma, page, start);
985 flush_dcache_page(page);
992 } while (len && start < vma->vm_end);
997 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
999 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
1000 unsigned long start, int len, int write, int force,
1001 struct page **pages, struct vm_area_struct **vmas)
1003 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1004 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1005 int flags = FOLL_TOUCH;
1010 flags |= FOLL_WRITE;
1012 flags |= FOLL_FORCE;
1013 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1017 flags |= GUP_FLAGS_WRITE;
1019 flags |= GUP_FLAGS_FORCE;
1020 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1022 return __get_user_pages_uprobe(tsk, mm,
1024 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1026 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1028 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1030 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1034 #define ACCESS_PROCESS_OPTIMIZATION 0
1036 #if ACCESS_PROCESS_OPTIMIZATION
1038 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1039 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1041 static void read_data_current(unsigned long addr, void *buf, int len)
1046 for (step = GET_STEP_4(len); len; len -= step) {
1047 switch (GET_STEP_4(len)) {
1049 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1055 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1060 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1070 static void write_data_current(unsigned long addr, void *buf, int len)
1075 for (step = GET_STEP_4(len); len; len -= step) {
1076 switch (GET_STEP_4(len)) {
1078 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1084 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1089 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1099 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1101 struct mm_struct *mm;
1102 struct vm_area_struct *vma;
1103 void *old_buf = buf;
1110 #if ACCESS_PROCESS_OPTIMIZATION
1111 if (write == 0 && tsk == current) {
1112 read_data_current(addr, buf, len);
1117 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1121 /* FIXME: danger: write memory in atomic context */
1122 atomic = in_atomic();
1124 /* ignore errors, just check how much was successfully transferred */
1126 int bytes, ret, offset;
1128 struct page *page = NULL;
1130 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1131 write, 1, &page, &vma);
1135 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1136 * we can access using slightly different code.
1138 #ifdef CONFIG_HAVE_IOREMAP_PROT
1139 vma = find_vma(mm, addr);
1142 if (vma->vm_ops && vma->vm_ops->access)
1143 ret = vma->vm_ops->access(vma, addr, buf,
1151 offset = addr & (PAGE_SIZE-1);
1152 if (bytes > PAGE_SIZE-offset)
1153 bytes = PAGE_SIZE-offset;
1155 maddr = atomic ? swap_kmap_atomic(page) : kmap(page);
1158 swap_copy_to_user_page(vma, page, addr,
1159 maddr + offset, buf, bytes);
1160 set_page_dirty_lock(page);
1162 copy_from_user_page(vma, page, addr,
1163 buf, maddr + offset, bytes);
1166 atomic ? swap_kunmap_atomic(maddr) : kunmap(page);
1167 page_cache_release(page);
1174 return buf - old_buf;
1177 int page_present (struct mm_struct *mm, unsigned long address)
1185 pgd = pgd_offset(mm, address);
1186 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1189 pud = pud_offset(pgd, address);
1190 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1193 pmd = pmd_offset(pud, address);
1194 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1197 ptep = pte_offset_map(pmd, address);
1203 if (pte_present(pte)) {
1205 if (pfn_valid(pfn)) {
1215 EXPORT_SYMBOL_GPL (page_present);
1216 EXPORT_SYMBOL_GPL (access_process_vm_atomic);