2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
36 #include <linux/slab.h>
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
42 unsigned long sys_exit_group_addr;
43 unsigned long do_group_exit_addr;
45 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
46 /* kernel define 'pgd_offset_k' redefinition */
48 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
51 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
54 static unsigned long swap_zero_pfn = 0;
56 #endif /* is_zero_pfn */
57 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
59 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
60 static inline void *dbi_kmap_atomic(struct page *page)
62 return kmap_atomic(page);
64 static inline void dbi_kunmap_atomic(void *kvaddr)
66 kunmap_atomic(kvaddr);
68 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
69 static inline void *dbi_kmap_atomic(struct page *page)
71 return kmap_atomic(page, KM_USER0);
74 static inline void dbi_kunmap_atomic(void *kvaddr)
76 kunmap_atomic(kvaddr, KM_USER0);
78 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
80 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
81 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
82 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
83 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
84 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
85 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
86 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
87 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
88 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
90 /* copy_to_user_page */
91 #ifndef copy_to_user_page
92 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
93 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
94 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
95 #endif /* copy_to_user_page */
98 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
100 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
102 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
103 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
104 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
107 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
108 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
110 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
111 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
112 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
113 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
114 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
116 #ifdef CONFIG_HUGETLB_PAGE
117 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
118 struct vm_area_struct *vma, struct page **pages, \
119 struct vm_area_struct **vmas, unsigned long *position, int *length, \
123 #ifdef __HAVE_ARCH_GATE_AREA
124 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
125 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
126 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
127 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
128 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
129 #endif /* __HAVE_ARCH_GATE_AREA */
131 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
132 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
133 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
134 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
135 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
137 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
138 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
139 struct page *, struct vm_area_struct * vma, \
140 unsigned long address, unsigned int foll_flags, \
141 unsigned int *page_mask);
142 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
143 struct vm_area_struct * vma, \
144 unsigned long address, \
145 unsigned int foll_flags, \
146 unsigned int *page_mask)
147 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
148 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
149 static DECLARE_MOD_FUNC_DEP(follow_page, \
150 struct page *, struct vm_area_struct * vma, \
151 unsigned long address, unsigned int foll_flags);
152 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
153 struct vm_area_struct * vma, \
154 unsigned long address, \
155 unsigned int foll_flags)
156 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
157 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
159 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
160 void, struct vm_area_struct *vma, struct page *page, \
161 unsigned long vmaddr);
162 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
163 struct page *, struct vm_area_struct *vma, \
164 unsigned long addr, pte_t pte);
165 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
166 void, struct vm_area_struct *vma, struct page *page, \
167 unsigned long uaddr, void *kaddr, unsigned long len, int write);
170 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
171 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
172 void, struct task_struct *tsk);
174 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
175 void, struct rcu_head * rhp);
178 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
179 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
181 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
182 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
184 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
185 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
186 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
187 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
188 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
191 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
192 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
193 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
196 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
197 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
198 struct vm_area_struct *, struct mm_struct *mm)
199 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
200 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
201 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
202 struct vm_area_struct *, struct task_struct *tsk)
203 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
204 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
206 #ifdef CONFIG_HUGETLB_PAGE
207 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
208 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
211 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
213 #ifdef __HAVE_ARCH_GATE_AREA
214 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
215 struct mm_struct *mm = task->mm;
216 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
217 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
218 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
219 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
220 #else /*__HAVE_ARCH_GATE_AREA */
221 return in_gate_area(task, addr);
222 #endif/*__HAVE_ARCH_GATE_AREA */
226 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
227 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
228 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
229 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
230 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
231 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
232 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
234 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
236 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
237 return in_gate_area_no_mm(addr);
238 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
239 return in_gate_area_no_task(addr);
240 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
243 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
244 void, struct vm_area_struct *vma, \
245 struct page *page, unsigned long vmaddr)
246 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
248 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
249 struct page *, struct vm_area_struct *vma, \
250 unsigned long addr, pte_t pte)
251 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
253 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
254 void, struct vm_area_struct *vma, struct page *page, \
255 unsigned long uaddr, void *kaddr, unsigned long len, int write)
256 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
260 int init_module_dependencies(void)
263 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
264 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
267 #ifndef copy_to_user_page
268 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
269 #endif /* copy_to_user_page */
271 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
272 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
273 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
275 #ifdef CONFIG_HUGETLB_PAGE
276 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
279 #ifdef __HAVE_ARCH_GATE_AREA
280 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
283 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
284 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
285 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
286 INIT_MOD_DEP_VAR(follow_page, follow_page);
287 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
289 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
292 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
293 #endif /* is_zero_pfn */
295 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
296 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
297 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
298 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
300 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
301 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
302 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
304 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
305 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
306 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
308 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
311 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
313 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
314 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
315 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
320 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
321 #define GUP_FLAGS_WRITE 0x1
322 #define GUP_FLAGS_FORCE 0x2
323 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
324 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
325 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
327 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
328 static inline int use_zero_page(struct vm_area_struct *vma)
331 * We don't want to optimize FOLL_ANON for make_pages_present()
332 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
333 * we want to get the page from the page tables to make sure
334 * that we serialize and update with any other user of that
337 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
340 * And if we have a fault routine, it's not an anonymous region.
342 return !vma->vm_ops || !vma->vm_ops->fault;
345 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
347 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
349 #ifdef __HAVE_COLOR_ZERO_PAGE
351 static inline int swap_is_zero_pfn(unsigned long pfn)
353 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
354 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
357 #else /* __HAVE_COLOR_ZERO_PAGE */
359 static inline int swap_is_zero_pfn(unsigned long pfn)
361 return pfn == swap_zero_pfn;
363 #endif /* __HAVE_COLOR_ZERO_PAGE */
365 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
367 static inline int swap_is_zero_pfn(unsigned long pfn)
370 return pfn == swap_zero_pfn;
371 #else /* is_zero_pfn */
372 return is_zero_pfn(pfn);
373 #endif /* is_zero_pfn */
376 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
378 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
380 return stack_guard_page_start(vma, addr) ||
381 stack_guard_page_end(vma, addr+PAGE_SIZE);
384 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
386 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
387 unsigned long address, unsigned int foll_flags)
389 unsigned int unused_page_mask;
390 return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
393 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
394 unsigned long start, unsigned long nr_pages,
395 unsigned int gup_flags, struct page **pages,
396 struct vm_area_struct **vmas, int *nonblocking)
399 unsigned long vm_flags;
400 unsigned int page_mask;
405 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
408 * Require read or write permissions.
409 * If FOLL_FORCE is set, we only require the "MAY" flags.
411 vm_flags = (gup_flags & FOLL_WRITE) ?
412 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
413 vm_flags &= (gup_flags & FOLL_FORCE) ?
414 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
417 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
418 * would be called on PROT_NONE ranges. We must never invoke
419 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
420 * page faults would unprotect the PROT_NONE ranges if
421 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
422 * bitflag. So to avoid that, don't set FOLL_NUMA if
425 if (!(gup_flags & FOLL_FORCE))
426 gup_flags |= FOLL_NUMA;
431 struct vm_area_struct *vma;
433 vma = find_extend_vma(mm, start);
434 if (!vma && dbi_in_gate_area(tsk, start)) {
435 unsigned long pg = start & PAGE_MASK;
441 /* user gate pages are read-only */
442 if (gup_flags & FOLL_WRITE)
443 return i ? : -EFAULT;
445 pgd = pgd_offset_k(pg);
447 pgd = pgd_offset_gate(mm, pg);
448 BUG_ON(pgd_none(*pgd));
449 pud = pud_offset(pgd, pg);
450 BUG_ON(pud_none(*pud));
451 pmd = pmd_offset(pud, pg);
453 return i ? : -EFAULT;
454 VM_BUG_ON(pmd_trans_huge(*pmd));
455 pte = pte_offset_map(pmd, pg);
456 if (pte_none(*pte)) {
458 return i ? : -EFAULT;
460 vma = get_gate_vma(mm);
464 page = vm_normal_page(vma, start, *pte);
466 if (!(gup_flags & FOLL_DUMP) &&
467 swap_is_zero_pfn(pte_pfn(*pte)))
468 page = pte_page(*pte);
471 return i ? : -EFAULT;
483 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
484 !(vm_flags & vma->vm_flags))
485 return i ? : -EFAULT;
487 if (is_vm_hugetlb_page(vma)) {
488 i = follow_hugetlb_page(mm, vma, pages, vmas,
489 &start, &nr_pages, i, gup_flags);
495 unsigned int foll_flags = gup_flags;
496 unsigned int page_increm;
499 * If we have a pending SIGKILL, don't keep faulting
500 * pages and potentially allocating memory.
502 if (unlikely(fatal_signal_pending(current)))
503 return i ? i : -ERESTARTSYS;
505 /* cond_resched(); */
506 while (!(page = follow_page_mask(vma, start,
507 foll_flags, &page_mask))) {
509 unsigned int fault_flags = 0;
511 /* For mlock, just skip the stack guard page. */
512 if (foll_flags & FOLL_MLOCK) {
513 if (stack_guard_page(vma, start))
516 if (foll_flags & FOLL_WRITE)
517 fault_flags |= FAULT_FLAG_WRITE;
519 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
520 if (foll_flags & FOLL_NOWAIT)
521 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
523 ret = handle_mm_fault(mm, vma, start,
526 if (ret & VM_FAULT_ERROR) {
527 if (ret & VM_FAULT_OOM)
528 return i ? i : -ENOMEM;
529 if (ret & (VM_FAULT_HWPOISON |
530 VM_FAULT_HWPOISON_LARGE)) {
533 else if (gup_flags & FOLL_HWPOISON)
538 if (ret & VM_FAULT_SIGBUS)
539 return i ? i : -EFAULT;
544 if (ret & VM_FAULT_MAJOR)
550 if (ret & VM_FAULT_RETRY) {
557 * The VM_FAULT_WRITE bit tells us that
558 * do_wp_page has broken COW when necessary,
559 * even if maybe_mkwrite decided not to set
560 * pte_write. We can thus safely do subsequent
561 * page lookups as if they were reads. But only
562 * do so when looping for pte_write is futile:
563 * in some cases userspace may also be wanting
564 * to write to the gotten user page, which a
565 * read fault here might prevent (a readonly
566 * page might get reCOWed by userspace write).
568 if ((ret & VM_FAULT_WRITE) &&
569 !(vma->vm_flags & VM_WRITE))
570 foll_flags &= ~FOLL_WRITE;
572 /* cond_resched(); */
575 return i ? i : PTR_ERR(page);
579 flush_anon_page(vma, page, start);
580 flush_dcache_page(page);
588 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
589 if (page_increm > nr_pages)
590 page_increm = nr_pages;
592 start += page_increm * PAGE_SIZE;
593 nr_pages -= page_increm;
594 } while (nr_pages && start < vma->vm_end);
599 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
601 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
602 unsigned long start, int nr_pages, unsigned int gup_flags,
603 struct page **pages, struct vm_area_struct **vmas,
607 unsigned long vm_flags;
613 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
616 * Require read or write permissions.
617 * If FOLL_FORCE is set, we only require the "MAY" flags.
619 vm_flags = (gup_flags & FOLL_WRITE) ?
620 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
621 vm_flags &= (gup_flags & FOLL_FORCE) ?
622 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
626 struct vm_area_struct *vma;
628 vma = find_extend_vma(mm, start);
629 if (!vma && dbi_in_gate_area_no_xxx(start)) {
630 unsigned long pg = start & PAGE_MASK;
636 /* user gate pages are read-only */
637 if (gup_flags & FOLL_WRITE) {
638 return i ? : -EFAULT;
641 pgd = pgd_offset_k(pg);
643 pgd = pgd_offset_gate(mm, pg);
644 BUG_ON(pgd_none(*pgd));
645 pud = pud_offset(pgd, pg);
646 BUG_ON(pud_none(*pud));
647 pmd = pmd_offset(pud, pg);
648 if (pmd_none(*pmd)) {
649 return i ? : -EFAULT;
651 VM_BUG_ON(pmd_trans_huge(*pmd));
652 pte = pte_offset_map(pmd, pg);
653 if (pte_none(*pte)) {
655 return i ? : -EFAULT;
657 vma = get_gate_vma(mm);
661 page = vm_normal_page(vma, start, *pte);
663 if (!(gup_flags & FOLL_DUMP) &&
664 swap_is_zero_pfn(pte_pfn(*pte)))
665 page = pte_page(*pte);
668 return i ? : -EFAULT;
679 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
680 !(vm_flags & vma->vm_flags)) {
681 return i ? : -EFAULT;
684 if (is_vm_hugetlb_page(vma)) {
685 i = follow_hugetlb_page(mm, vma, pages, vmas,
686 &start, &nr_pages, i, gup_flags);
692 unsigned int foll_flags = gup_flags;
695 * If we have a pending SIGKILL, don't keep faulting
696 * pages and potentially allocating memory.
698 if (unlikely(fatal_signal_pending(current))) {
699 return i ? i : -ERESTARTSYS;
702 /* cond_resched(); */
703 while (!(page = follow_page(vma, start, foll_flags))) {
705 unsigned int fault_flags = 0;
707 /* For mlock, just skip the stack guard page. */
708 if (foll_flags & FOLL_MLOCK) {
709 if (stack_guard_page(vma, start))
712 if (foll_flags & FOLL_WRITE)
713 fault_flags |= FAULT_FLAG_WRITE;
715 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
716 if (foll_flags & FOLL_NOWAIT)
717 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
719 ret = handle_mm_fault(mm, vma, start,
722 if (ret & VM_FAULT_ERROR) {
723 if (ret & VM_FAULT_OOM) {
724 return i ? i : -ENOMEM;
726 if (ret & (VM_FAULT_HWPOISON |
727 VM_FAULT_HWPOISON_LARGE)) {
731 else if (gup_flags & FOLL_HWPOISON) {
738 if (ret & VM_FAULT_SIGBUS) {
739 return i ? i : -EFAULT;
745 if (ret & VM_FAULT_MAJOR)
751 if (ret & VM_FAULT_RETRY) {
758 * The VM_FAULT_WRITE bit tells us that
759 * do_wp_page has broken COW when necessary,
760 * even if maybe_mkwrite decided not to set
761 * pte_write. We can thus safely do subsequent
762 * page lookups as if they were reads. But only
763 * do so when looping for pte_write is futile:
764 * in some cases userspace may also be wanting
765 * to write to the gotten user page, which a
766 * read fault here might prevent (a readonly
767 * page might get reCOWed by userspace write).
769 if ((ret & VM_FAULT_WRITE) &&
770 !(vma->vm_flags & VM_WRITE))
771 foll_flags &= ~FOLL_WRITE;
773 /* cond_resched(); */
776 return i ? i : PTR_ERR(page);
781 flush_anon_page(vma, page, start);
782 flush_dcache_page(page);
790 } while (nr_pages && start < vma->vm_end);
796 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
798 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
800 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
801 unsigned long start, int len, int flags,
802 struct page **pages, struct vm_area_struct **vmas)
805 unsigned int vm_flags = 0;
806 int write = !!(flags & GUP_FLAGS_WRITE);
807 int force = !!(flags & GUP_FLAGS_FORCE);
808 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
813 * Require read or write permissions.
814 * If 'force' is set, we only require the "MAY" flags.
816 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
817 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
821 struct vm_area_struct *vma;
822 unsigned int foll_flags;
824 vma = find_vma(mm, start);
825 if (!vma && dbi_in_gate_area(tsk, start)) {
826 unsigned long pg = start & PAGE_MASK;
827 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
833 /* user gate pages are read-only */
834 if (!ignore && write)
835 return i ? : -EFAULT;
837 pgd = pgd_offset_k(pg);
839 pgd = pgd_offset_gate(mm, pg);
840 BUG_ON(pgd_none(*pgd));
841 pud = pud_offset(pgd, pg);
842 BUG_ON(pud_none(*pud));
843 pmd = pmd_offset(pud, pg);
845 return i ? : -EFAULT;
846 pte = pte_offset_map(pmd, pg);
847 if (pte_none(*pte)) {
849 return i ? : -EFAULT;
852 struct page *page = vm_normal_page(gate_vma, start, *pte);
867 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
868 (!ignore && !(vm_flags & vma->vm_flags)))
869 return i ? : -EFAULT;
871 if (is_vm_hugetlb_page(vma)) {
872 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
873 i = follow_hugetlb_page(mm, vma, pages, vmas,
876 i = follow_hugetlb_page(mm, vma, pages, vmas,
877 &start, &len, i, write);
882 foll_flags = FOLL_TOUCH;
884 foll_flags |= FOLL_GET;
886 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
887 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
888 if (!write && use_zero_page(vma))
889 foll_flags |= FOLL_ANON;
897 foll_flags |= FOLL_WRITE;
902 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
903 while (!(page = follow_page(vma, start, foll_flags))) {
905 ret = handle_mm_fault(mm, vma, start,
906 foll_flags & FOLL_WRITE);
908 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
909 if (ret & VM_FAULT_WRITE)
910 foll_flags &= ~FOLL_WRITE;
912 switch (ret & ~VM_FAULT_WRITE) {
919 case VM_FAULT_SIGBUS:
920 return i ? i : -EFAULT;
922 return i ? i : -ENOMEM;
928 if (ret & VM_FAULT_ERROR) {
929 if (ret & VM_FAULT_OOM)
930 return i ? i : -ENOMEM;
931 else if (ret & VM_FAULT_SIGBUS)
932 return i ? i : -EFAULT;
935 if (ret & VM_FAULT_MAJOR)
941 * The VM_FAULT_WRITE bit tells us that
942 * do_wp_page has broken COW when necessary,
943 * even if maybe_mkwrite decided not to set
944 * pte_write. We can thus safely do subsequent
945 * page lookups as if they were reads. But only
946 * do so when looping for pte_write is futile:
947 * in some cases userspace may also be wanting
948 * to write to the gotten user page, which a
949 * read fault here might prevent (a readonly
950 * page might get reCOWed by userspace write).
952 if ((ret & VM_FAULT_WRITE) &&
953 !(vma->vm_flags & VM_WRITE))
954 foll_flags &= ~FOLL_WRITE;
962 return i ? i : PTR_ERR(page);
966 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
967 flush_anon_page(page, start);
969 flush_anon_page(vma, page, start);
971 flush_dcache_page(page);
978 } while (len && start < vma->vm_end);
983 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
985 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
986 unsigned long start, int len, int write, int force,
987 struct page **pages, struct vm_area_struct **vmas)
989 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
990 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
991 int flags = FOLL_TOUCH;
999 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1003 flags |= GUP_FLAGS_WRITE;
1005 flags |= GUP_FLAGS_FORCE;
1006 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1008 return __get_user_pages_uprobe(tsk, mm,
1010 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1012 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1014 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1016 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1020 #define ACCESS_PROCESS_OPTIMIZATION 0
1022 #if ACCESS_PROCESS_OPTIMIZATION
1024 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1025 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1027 static void read_data_current(unsigned long addr, void *buf, int len)
1032 for (step = GET_STEP_4(len); len; len -= step) {
1033 switch (GET_STEP_4(len)) {
1035 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1041 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1046 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1056 static void write_data_current(unsigned long addr, void *buf, int len)
1061 for (step = GET_STEP_4(len); len; len -= step) {
1062 switch (GET_STEP_4(len)) {
1064 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1070 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1075 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1085 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1087 struct mm_struct *mm;
1088 struct vm_area_struct *vma;
1089 void *old_buf = buf;
1095 #if ACCESS_PROCESS_OPTIMIZATION
1096 if (write == 0 && tsk == current) {
1097 read_data_current(addr, buf, len);
1102 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1106 /* ignore errors, just check how much was successfully transferred */
1108 int bytes, ret, offset;
1110 struct page *page = NULL;
1112 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1113 write, 1, &page, &vma);
1117 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1118 * we can access using slightly different code.
1120 #ifdef CONFIG_HAVE_IOREMAP_PROT
1121 vma = find_vma(mm, addr);
1124 if (vma->vm_ops && vma->vm_ops->access)
1125 ret = vma->vm_ops->access(vma, addr, buf,
1133 offset = addr & (PAGE_SIZE-1);
1134 if (bytes > PAGE_SIZE-offset)
1135 bytes = PAGE_SIZE-offset;
1137 maddr = dbi_kmap_atomic(page);
1140 copy_to_user_page(vma, page, addr,
1141 maddr + offset, buf, bytes);
1142 set_page_dirty_lock(page);
1144 copy_from_user_page(vma, page, addr,
1145 buf, maddr + offset, bytes);
1148 dbi_kunmap_atomic(maddr);
1149 page_cache_release(page);
1156 return buf - old_buf;
1159 int page_present (struct mm_struct *mm, unsigned long address)
1167 pgd = pgd_offset(mm, address);
1168 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1171 pud = pud_offset(pgd, address);
1172 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1175 pmd = pmd_offset(pud, address);
1176 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1179 ptep = pte_offset_map(pmd, address);
1185 if (pte_present(pte)) {
1187 if (pfn_valid(pfn)) {
1197 EXPORT_SYMBOL_GPL (page_present);
1198 EXPORT_SYMBOL_GPL (access_process_vm_atomic);