2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
36 #include <linux/slab.h>
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
42 unsigned long sys_exit_group_addr;
43 unsigned long do_group_exit_addr;
44 unsigned long sys_exit_addr;
46 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
47 /* kernel define 'pgd_offset_k' redefinition */
49 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
52 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
55 static unsigned long swap_zero_pfn = 0;
57 #endif /* is_zero_pfn */
58 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
60 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
61 static inline void *dbi_kmap_atomic(struct page *page)
63 return kmap_atomic(page);
65 static inline void dbi_kunmap_atomic(void *kvaddr)
67 kunmap_atomic(kvaddr);
69 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
70 static inline void *dbi_kmap_atomic(struct page *page)
72 return kmap_atomic(page, KM_USER0);
75 static inline void dbi_kunmap_atomic(void *kvaddr)
77 kunmap_atomic(kvaddr, KM_USER0);
79 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
81 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
82 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
83 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
84 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
85 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
86 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
87 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
88 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
89 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
91 /* copy_to_user_page */
92 #ifndef copy_to_user_page
93 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
94 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
95 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
96 #endif /* copy_to_user_page */
99 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
101 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
103 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
104 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
105 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
108 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
109 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
111 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
112 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
113 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
114 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
115 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
117 #ifdef CONFIG_HUGETLB_PAGE
118 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
119 struct vm_area_struct *vma, struct page **pages, \
120 struct vm_area_struct **vmas, unsigned long *position, int *length, \
124 #ifdef __HAVE_ARCH_GATE_AREA
125 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
126 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
127 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
128 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
129 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
130 #endif /* __HAVE_ARCH_GATE_AREA */
132 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
133 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
134 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
135 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
136 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
138 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
139 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
140 struct page *, struct vm_area_struct * vma, \
141 unsigned long address, unsigned int foll_flags, \
142 unsigned int *page_mask);
143 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
144 struct vm_area_struct * vma, \
145 unsigned long address, \
146 unsigned int foll_flags, \
147 unsigned int *page_mask)
148 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
149 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
150 static DECLARE_MOD_FUNC_DEP(follow_page, \
151 struct page *, struct vm_area_struct * vma, \
152 unsigned long address, unsigned int foll_flags);
153 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
154 struct vm_area_struct * vma, \
155 unsigned long address, \
156 unsigned int foll_flags)
157 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
158 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
160 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
161 void, struct vm_area_struct *vma, struct page *page, \
162 unsigned long vmaddr);
163 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
164 struct page *, struct vm_area_struct *vma, \
165 unsigned long addr, pte_t pte);
166 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
167 void, struct vm_area_struct *vma, struct page *page, \
168 unsigned long uaddr, void *kaddr, unsigned long len, int write);
171 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
172 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
173 void, struct task_struct *tsk);
175 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
176 void, struct rcu_head * rhp);
179 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
180 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
182 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
183 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
185 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
186 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
187 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
188 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
189 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
192 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
193 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
194 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
197 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
198 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
199 struct vm_area_struct *, struct mm_struct *mm)
200 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
201 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
202 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
203 struct vm_area_struct *, struct task_struct *tsk)
204 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
205 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
207 #ifdef CONFIG_HUGETLB_PAGE
208 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
209 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
212 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
214 #ifdef __HAVE_ARCH_GATE_AREA
215 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
216 struct mm_struct *mm = task->mm;
217 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
218 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
219 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
220 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
221 #else /*__HAVE_ARCH_GATE_AREA */
222 return in_gate_area(task, addr);
223 #endif/*__HAVE_ARCH_GATE_AREA */
227 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
228 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
229 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
230 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
231 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
232 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
233 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
235 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
237 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
238 return in_gate_area_no_mm(addr);
239 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
240 return in_gate_area_no_task(addr);
241 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
244 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
245 void, struct vm_area_struct *vma, \
246 struct page *page, unsigned long vmaddr)
247 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
249 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
250 struct page *, struct vm_area_struct *vma, \
251 unsigned long addr, pte_t pte)
252 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
254 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
255 void, struct vm_area_struct *vma, struct page *page, \
256 unsigned long uaddr, void *kaddr, unsigned long len, int write)
257 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
261 int init_module_dependencies(void)
264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
265 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
268 #ifndef copy_to_user_page
269 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
270 #endif /* copy_to_user_page */
272 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
273 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
274 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
276 #ifdef CONFIG_HUGETLB_PAGE
277 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
280 #ifdef __HAVE_ARCH_GATE_AREA
281 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
284 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
285 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
286 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
287 INIT_MOD_DEP_VAR(follow_page, follow_page);
288 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
290 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
293 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
294 #endif /* is_zero_pfn */
296 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
297 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
298 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
299 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
301 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
302 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
303 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
305 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
306 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
307 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
309 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
312 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
314 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
315 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
316 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
321 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
322 #define GUP_FLAGS_WRITE 0x1
323 #define GUP_FLAGS_FORCE 0x2
324 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
325 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
326 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
328 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
329 static inline int use_zero_page(struct vm_area_struct *vma)
332 * We don't want to optimize FOLL_ANON for make_pages_present()
333 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
334 * we want to get the page from the page tables to make sure
335 * that we serialize and update with any other user of that
338 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
341 * And if we have a fault routine, it's not an anonymous region.
343 return !vma->vm_ops || !vma->vm_ops->fault;
346 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
348 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
350 #ifdef __HAVE_COLOR_ZERO_PAGE
352 static inline int swap_is_zero_pfn(unsigned long pfn)
354 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
355 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
358 #else /* __HAVE_COLOR_ZERO_PAGE */
360 static inline int swap_is_zero_pfn(unsigned long pfn)
362 return pfn == swap_zero_pfn;
364 #endif /* __HAVE_COLOR_ZERO_PAGE */
366 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
368 static inline int swap_is_zero_pfn(unsigned long pfn)
371 return pfn == swap_zero_pfn;
372 #else /* is_zero_pfn */
373 return is_zero_pfn(pfn);
374 #endif /* is_zero_pfn */
377 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
379 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
381 return stack_guard_page_start(vma, addr) ||
382 stack_guard_page_end(vma, addr+PAGE_SIZE);
385 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
387 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
388 unsigned long address, unsigned int foll_flags)
390 unsigned int unused_page_mask;
391 return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
394 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
395 unsigned long start, unsigned long nr_pages,
396 unsigned int gup_flags, struct page **pages,
397 struct vm_area_struct **vmas, int *nonblocking)
400 unsigned long vm_flags;
401 unsigned int page_mask;
406 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
409 * Require read or write permissions.
410 * If FOLL_FORCE is set, we only require the "MAY" flags.
412 vm_flags = (gup_flags & FOLL_WRITE) ?
413 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
414 vm_flags &= (gup_flags & FOLL_FORCE) ?
415 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
418 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
419 * would be called on PROT_NONE ranges. We must never invoke
420 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
421 * page faults would unprotect the PROT_NONE ranges if
422 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
423 * bitflag. So to avoid that, don't set FOLL_NUMA if
426 if (!(gup_flags & FOLL_FORCE))
427 gup_flags |= FOLL_NUMA;
432 struct vm_area_struct *vma;
434 vma = find_extend_vma(mm, start);
435 if (!vma && dbi_in_gate_area(tsk, start)) {
436 unsigned long pg = start & PAGE_MASK;
442 /* user gate pages are read-only */
443 if (gup_flags & FOLL_WRITE)
444 return i ? : -EFAULT;
446 pgd = pgd_offset_k(pg);
448 pgd = pgd_offset_gate(mm, pg);
449 BUG_ON(pgd_none(*pgd));
450 pud = pud_offset(pgd, pg);
451 BUG_ON(pud_none(*pud));
452 pmd = pmd_offset(pud, pg);
454 return i ? : -EFAULT;
455 VM_BUG_ON(pmd_trans_huge(*pmd));
456 pte = pte_offset_map(pmd, pg);
457 if (pte_none(*pte)) {
459 return i ? : -EFAULT;
461 vma = get_gate_vma(mm);
465 page = vm_normal_page(vma, start, *pte);
467 if (!(gup_flags & FOLL_DUMP) &&
468 swap_is_zero_pfn(pte_pfn(*pte)))
469 page = pte_page(*pte);
472 return i ? : -EFAULT;
484 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
485 !(vm_flags & vma->vm_flags))
486 return i ? : -EFAULT;
488 if (is_vm_hugetlb_page(vma)) {
489 i = follow_hugetlb_page(mm, vma, pages, vmas,
490 &start, &nr_pages, i, gup_flags);
496 unsigned int foll_flags = gup_flags;
497 unsigned int page_increm;
500 * If we have a pending SIGKILL, don't keep faulting
501 * pages and potentially allocating memory.
503 if (unlikely(fatal_signal_pending(current)))
504 return i ? i : -ERESTARTSYS;
506 /* cond_resched(); */
507 while (!(page = follow_page_mask(vma, start,
508 foll_flags, &page_mask))) {
510 unsigned int fault_flags = 0;
512 /* For mlock, just skip the stack guard page. */
513 if (foll_flags & FOLL_MLOCK) {
514 if (stack_guard_page(vma, start))
517 if (foll_flags & FOLL_WRITE)
518 fault_flags |= FAULT_FLAG_WRITE;
520 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
521 if (foll_flags & FOLL_NOWAIT)
522 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
524 ret = handle_mm_fault(mm, vma, start,
527 if (ret & VM_FAULT_ERROR) {
528 if (ret & VM_FAULT_OOM)
529 return i ? i : -ENOMEM;
530 if (ret & (VM_FAULT_HWPOISON |
531 VM_FAULT_HWPOISON_LARGE)) {
534 else if (gup_flags & FOLL_HWPOISON)
539 if (ret & VM_FAULT_SIGBUS)
540 return i ? i : -EFAULT;
545 if (ret & VM_FAULT_MAJOR)
551 if (ret & VM_FAULT_RETRY) {
558 * The VM_FAULT_WRITE bit tells us that
559 * do_wp_page has broken COW when necessary,
560 * even if maybe_mkwrite decided not to set
561 * pte_write. We can thus safely do subsequent
562 * page lookups as if they were reads. But only
563 * do so when looping for pte_write is futile:
564 * in some cases userspace may also be wanting
565 * to write to the gotten user page, which a
566 * read fault here might prevent (a readonly
567 * page might get reCOWed by userspace write).
569 if ((ret & VM_FAULT_WRITE) &&
570 !(vma->vm_flags & VM_WRITE))
571 foll_flags &= ~FOLL_WRITE;
573 /* cond_resched(); */
576 return i ? i : PTR_ERR(page);
580 flush_anon_page(vma, page, start);
581 flush_dcache_page(page);
589 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
590 if (page_increm > nr_pages)
591 page_increm = nr_pages;
593 start += page_increm * PAGE_SIZE;
594 nr_pages -= page_increm;
595 } while (nr_pages && start < vma->vm_end);
600 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
602 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
603 unsigned long start, int nr_pages, unsigned int gup_flags,
604 struct page **pages, struct vm_area_struct **vmas,
608 unsigned long vm_flags;
614 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
617 * Require read or write permissions.
618 * If FOLL_FORCE is set, we only require the "MAY" flags.
620 vm_flags = (gup_flags & FOLL_WRITE) ?
621 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
622 vm_flags &= (gup_flags & FOLL_FORCE) ?
623 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
627 struct vm_area_struct *vma;
629 vma = find_extend_vma(mm, start);
630 if (!vma && dbi_in_gate_area_no_xxx(start)) {
631 unsigned long pg = start & PAGE_MASK;
637 /* user gate pages are read-only */
638 if (gup_flags & FOLL_WRITE) {
639 return i ? : -EFAULT;
642 pgd = pgd_offset_k(pg);
644 pgd = pgd_offset_gate(mm, pg);
645 BUG_ON(pgd_none(*pgd));
646 pud = pud_offset(pgd, pg);
647 BUG_ON(pud_none(*pud));
648 pmd = pmd_offset(pud, pg);
649 if (pmd_none(*pmd)) {
650 return i ? : -EFAULT;
652 VM_BUG_ON(pmd_trans_huge(*pmd));
653 pte = pte_offset_map(pmd, pg);
654 if (pte_none(*pte)) {
656 return i ? : -EFAULT;
658 vma = get_gate_vma(mm);
662 page = vm_normal_page(vma, start, *pte);
664 if (!(gup_flags & FOLL_DUMP) &&
665 swap_is_zero_pfn(pte_pfn(*pte)))
666 page = pte_page(*pte);
669 return i ? : -EFAULT;
680 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
681 !(vm_flags & vma->vm_flags)) {
682 return i ? : -EFAULT;
685 if (is_vm_hugetlb_page(vma)) {
686 i = follow_hugetlb_page(mm, vma, pages, vmas,
687 &start, &nr_pages, i, gup_flags);
693 unsigned int foll_flags = gup_flags;
696 * If we have a pending SIGKILL, don't keep faulting
697 * pages and potentially allocating memory.
699 if (unlikely(fatal_signal_pending(current))) {
700 return i ? i : -ERESTARTSYS;
703 /* cond_resched(); */
704 while (!(page = follow_page(vma, start, foll_flags))) {
706 unsigned int fault_flags = 0;
708 /* For mlock, just skip the stack guard page. */
709 if (foll_flags & FOLL_MLOCK) {
710 if (stack_guard_page(vma, start))
713 if (foll_flags & FOLL_WRITE)
714 fault_flags |= FAULT_FLAG_WRITE;
716 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
717 if (foll_flags & FOLL_NOWAIT)
718 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
720 ret = handle_mm_fault(mm, vma, start,
723 if (ret & VM_FAULT_ERROR) {
724 if (ret & VM_FAULT_OOM) {
725 return i ? i : -ENOMEM;
727 if (ret & (VM_FAULT_HWPOISON |
728 VM_FAULT_HWPOISON_LARGE)) {
732 else if (gup_flags & FOLL_HWPOISON) {
739 if (ret & VM_FAULT_SIGBUS) {
740 return i ? i : -EFAULT;
746 if (ret & VM_FAULT_MAJOR)
752 if (ret & VM_FAULT_RETRY) {
759 * The VM_FAULT_WRITE bit tells us that
760 * do_wp_page has broken COW when necessary,
761 * even if maybe_mkwrite decided not to set
762 * pte_write. We can thus safely do subsequent
763 * page lookups as if they were reads. But only
764 * do so when looping for pte_write is futile:
765 * in some cases userspace may also be wanting
766 * to write to the gotten user page, which a
767 * read fault here might prevent (a readonly
768 * page might get reCOWed by userspace write).
770 if ((ret & VM_FAULT_WRITE) &&
771 !(vma->vm_flags & VM_WRITE))
772 foll_flags &= ~FOLL_WRITE;
774 /* cond_resched(); */
777 return i ? i : PTR_ERR(page);
782 flush_anon_page(vma, page, start);
783 flush_dcache_page(page);
791 } while (nr_pages && start < vma->vm_end);
797 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
799 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
801 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
802 unsigned long start, int len, int flags,
803 struct page **pages, struct vm_area_struct **vmas)
806 unsigned int vm_flags = 0;
807 int write = !!(flags & GUP_FLAGS_WRITE);
808 int force = !!(flags & GUP_FLAGS_FORCE);
809 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
814 * Require read or write permissions.
815 * If 'force' is set, we only require the "MAY" flags.
817 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
818 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
822 struct vm_area_struct *vma;
823 unsigned int foll_flags;
825 vma = find_vma(mm, start);
826 if (!vma && dbi_in_gate_area(tsk, start)) {
827 unsigned long pg = start & PAGE_MASK;
828 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
834 /* user gate pages are read-only */
835 if (!ignore && write)
836 return i ? : -EFAULT;
838 pgd = pgd_offset_k(pg);
840 pgd = pgd_offset_gate(mm, pg);
841 BUG_ON(pgd_none(*pgd));
842 pud = pud_offset(pgd, pg);
843 BUG_ON(pud_none(*pud));
844 pmd = pmd_offset(pud, pg);
846 return i ? : -EFAULT;
847 pte = pte_offset_map(pmd, pg);
848 if (pte_none(*pte)) {
850 return i ? : -EFAULT;
853 struct page *page = vm_normal_page(gate_vma, start, *pte);
868 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
869 (!ignore && !(vm_flags & vma->vm_flags)))
870 return i ? : -EFAULT;
872 if (is_vm_hugetlb_page(vma)) {
873 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
874 i = follow_hugetlb_page(mm, vma, pages, vmas,
877 i = follow_hugetlb_page(mm, vma, pages, vmas,
878 &start, &len, i, write);
883 foll_flags = FOLL_TOUCH;
885 foll_flags |= FOLL_GET;
887 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
888 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
889 if (!write && use_zero_page(vma))
890 foll_flags |= FOLL_ANON;
898 foll_flags |= FOLL_WRITE;
903 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
904 while (!(page = follow_page(vma, start, foll_flags))) {
906 ret = handle_mm_fault(mm, vma, start,
907 foll_flags & FOLL_WRITE);
909 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
910 if (ret & VM_FAULT_WRITE)
911 foll_flags &= ~FOLL_WRITE;
913 switch (ret & ~VM_FAULT_WRITE) {
920 case VM_FAULT_SIGBUS:
921 return i ? i : -EFAULT;
923 return i ? i : -ENOMEM;
929 if (ret & VM_FAULT_ERROR) {
930 if (ret & VM_FAULT_OOM)
931 return i ? i : -ENOMEM;
932 else if (ret & VM_FAULT_SIGBUS)
933 return i ? i : -EFAULT;
936 if (ret & VM_FAULT_MAJOR)
942 * The VM_FAULT_WRITE bit tells us that
943 * do_wp_page has broken COW when necessary,
944 * even if maybe_mkwrite decided not to set
945 * pte_write. We can thus safely do subsequent
946 * page lookups as if they were reads. But only
947 * do so when looping for pte_write is futile:
948 * in some cases userspace may also be wanting
949 * to write to the gotten user page, which a
950 * read fault here might prevent (a readonly
951 * page might get reCOWed by userspace write).
953 if ((ret & VM_FAULT_WRITE) &&
954 !(vma->vm_flags & VM_WRITE))
955 foll_flags &= ~FOLL_WRITE;
963 return i ? i : PTR_ERR(page);
967 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
968 flush_anon_page(page, start);
970 flush_anon_page(vma, page, start);
972 flush_dcache_page(page);
979 } while (len && start < vma->vm_end);
984 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
986 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
987 unsigned long start, int len, int write, int force,
988 struct page **pages, struct vm_area_struct **vmas)
990 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
991 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
992 int flags = FOLL_TOUCH;
1000 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1004 flags |= GUP_FLAGS_WRITE;
1006 flags |= GUP_FLAGS_FORCE;
1007 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1009 return __get_user_pages_uprobe(tsk, mm,
1011 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1013 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1015 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1017 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1021 #define ACCESS_PROCESS_OPTIMIZATION 0
1023 #if ACCESS_PROCESS_OPTIMIZATION
1025 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1026 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1028 static void read_data_current(unsigned long addr, void *buf, int len)
1033 for (step = GET_STEP_4(len); len; len -= step) {
1034 switch (GET_STEP_4(len)) {
1036 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1042 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1047 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1057 static void write_data_current(unsigned long addr, void *buf, int len)
1062 for (step = GET_STEP_4(len); len; len -= step) {
1063 switch (GET_STEP_4(len)) {
1065 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1071 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1076 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1086 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1088 struct mm_struct *mm;
1089 struct vm_area_struct *vma;
1090 void *old_buf = buf;
1096 #if ACCESS_PROCESS_OPTIMIZATION
1097 if (write == 0 && tsk == current) {
1098 read_data_current(addr, buf, len);
1103 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1107 /* ignore errors, just check how much was successfully transferred */
1109 int bytes, ret, offset;
1111 struct page *page = NULL;
1113 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1114 write, 1, &page, &vma);
1118 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1119 * we can access using slightly different code.
1121 #ifdef CONFIG_HAVE_IOREMAP_PROT
1122 vma = find_vma(mm, addr);
1125 if (vma->vm_ops && vma->vm_ops->access)
1126 ret = vma->vm_ops->access(vma, addr, buf,
1134 offset = addr & (PAGE_SIZE-1);
1135 if (bytes > PAGE_SIZE-offset)
1136 bytes = PAGE_SIZE-offset;
1138 maddr = dbi_kmap_atomic(page);
1141 copy_to_user_page(vma, page, addr,
1142 maddr + offset, buf, bytes);
1143 set_page_dirty_lock(page);
1145 copy_from_user_page(vma, page, addr,
1146 buf, maddr + offset, bytes);
1149 dbi_kunmap_atomic(maddr);
1150 page_cache_release(page);
1157 return buf - old_buf;
1160 int page_present (struct mm_struct *mm, unsigned long address)
1168 pgd = pgd_offset(mm, address);
1169 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1172 pud = pud_offset(pgd, address);
1173 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1176 pmd = pmd_offset(pud, address);
1177 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1180 ptep = pte_offset_map(pmd, address);
1186 if (pte_present(pte)) {
1188 if (pfn_valid(pfn)) {
1198 EXPORT_SYMBOL_GPL (page_present);
1199 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
1200 EXPORT_SYMBOL_GPL (access_process_vm_atomic);