2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.c
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2013
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for
23 * both user and kernel spaces.
24 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module
25 * for separating core and arch parts
26 * 2010-2012 Dmitry Kovalenko <d.kovalenko@samsung.com>,
27 * Nikita Kalyazin <n.kalyazin@samsung.com>
28 * improvement and bugs fixing
29 * 2010-2011 Alexander Shirshikov
30 * improvement and bugs fixing
31 * 2011-2012 Stanislav Andreev <s.andreev@samsung.com>
32 * improvement and bugs fixing
33 * 2012 Vitaliy Cherepanov <v.chereapanov@samsung.com>
34 * improvement and bugs fixing
35 * 2012-2013 Vasiliy Ulyanov <v.ulyanov@samsung.com>,
36 * Vyacheslav Cherkashin <v.cherkashin@samsung.com>
37 * improvement and bugs fixing
40 #include <linux/module.h>
41 #include <linux/sched.h>
43 #include <asm/pgtable.h>
45 #include "dbi_kprobes_deps.h"
46 #include "dbi_kdebug.h"
49 #include <linux/slab.h>
52 unsigned long sched_addr;
53 unsigned long fork_addr;
54 unsigned long exit_addr;
55 unsigned long sys_exit_group_addr;
56 unsigned long do_group_exit_addr;
58 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
59 /* kernel define 'pgd_offset_k' redefinition */
61 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
64 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
67 static unsigned long swap_zero_pfn = 0;
69 #endif /* is_zero_pfn */
70 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
72 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
73 static inline void *dbi_kmap_atomic(struct page *page)
75 return kmap_atomic(page);
77 static inline void dbi_kunmap_atomic(void *kvaddr)
79 kunmap_atomic(kvaddr);
81 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
82 static inline void *dbi_kmap_atomic(struct page *page)
84 return kmap_atomic(page, KM_USER0);
87 static inline void dbi_kunmap_atomic(void *kvaddr)
89 kunmap_atomic(kvaddr, KM_USER0);
91 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
93 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
94 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
95 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
96 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
97 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
98 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
99 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
100 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
101 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
103 /* copy_to_user_page */
104 #ifndef copy_to_user_page
105 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
106 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
107 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
108 #endif /* copy_to_user_page */
111 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
113 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
115 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
116 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
117 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
120 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
121 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
123 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
124 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
125 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
126 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
127 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
129 #ifdef CONFIG_HUGETLB_PAGE
130 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
131 struct vm_area_struct *vma, struct page **pages, \
132 struct vm_area_struct **vmas, unsigned long *position, int *length, \
136 #ifdef __HAVE_ARCH_GATE_AREA
137 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
138 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
139 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
140 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
141 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
142 #endif /* __HAVE_ARCH_GATE_AREA */
144 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
145 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
146 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
147 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
148 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
150 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
151 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
152 struct page *, struct vm_area_struct * vma, \
153 unsigned long address, unsigned int foll_flags, \
154 unsigned int *page_mask);
155 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
156 struct vm_area_struct * vma, \
157 unsigned long address, \
158 unsigned int foll_flags, \
159 unsigned int *page_mask)
160 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
161 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
162 static DECLARE_MOD_FUNC_DEP(follow_page, \
163 struct page *, struct vm_area_struct * vma, \
164 unsigned long address, unsigned int foll_flags);
165 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
166 struct vm_area_struct * vma, \
167 unsigned long address, \
168 unsigned int foll_flags)
169 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
170 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
172 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
173 void, struct vm_area_struct *vma, struct page *page, \
174 unsigned long vmaddr);
175 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
176 struct page *, struct vm_area_struct *vma, \
177 unsigned long addr, pte_t pte);
178 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
179 void, struct vm_area_struct *vma, struct page *page, \
180 unsigned long uaddr, void *kaddr, unsigned long len, int write);
183 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
184 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
185 void, struct task_struct *tsk);
187 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
188 void, struct rcu_head * rhp);
191 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
192 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
194 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
195 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
197 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
198 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
199 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
200 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
201 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
204 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
205 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
206 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
209 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
210 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
211 struct vm_area_struct *, struct mm_struct *mm)
212 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
213 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
214 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
215 struct vm_area_struct *, struct task_struct *tsk)
216 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
219 #ifdef CONFIG_HUGETLB_PAGE
220 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
221 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
224 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
226 #ifdef __HAVE_ARCH_GATE_AREA
227 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
228 struct mm_struct *mm = task->mm;
229 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
230 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
231 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
232 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
233 #else /*__HAVE_ARCH_GATE_AREA */
234 return in_gate_area(task, addr);
235 #endif/*__HAVE_ARCH_GATE_AREA */
239 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
240 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
241 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
242 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
243 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
244 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
245 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
247 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
249 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
250 return in_gate_area_no_mm(addr);
251 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
252 return in_gate_area_no_task(addr);
253 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
256 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
257 void, struct vm_area_struct *vma, \
258 struct page *page, unsigned long vmaddr)
259 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
261 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
262 struct page *, struct vm_area_struct *vma, \
263 unsigned long addr, pte_t pte)
264 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
266 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
267 void, struct vm_area_struct *vma, struct page *page, \
268 unsigned long uaddr, void *kaddr, unsigned long len, int write)
269 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
273 int init_module_dependencies(void)
276 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
277 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
280 #ifndef copy_to_user_page
281 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
282 #endif /* copy_to_user_page */
284 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
285 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
286 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
288 #ifdef CONFIG_HUGETLB_PAGE
289 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
292 #ifdef __HAVE_ARCH_GATE_AREA
293 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
296 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
297 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
298 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
299 INIT_MOD_DEP_VAR(follow_page, follow_page);
300 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
302 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
305 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
306 #endif /* is_zero_pfn */
308 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
309 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
310 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
311 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
313 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
314 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
315 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
317 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
318 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
319 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
321 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
324 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
326 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
327 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
328 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
333 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
334 #define GUP_FLAGS_WRITE 0x1
335 #define GUP_FLAGS_FORCE 0x2
336 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
337 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
338 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
340 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
341 static inline int use_zero_page(struct vm_area_struct *vma)
344 * We don't want to optimize FOLL_ANON for make_pages_present()
345 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
346 * we want to get the page from the page tables to make sure
347 * that we serialize and update with any other user of that
350 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
353 * And if we have a fault routine, it's not an anonymous region.
355 return !vma->vm_ops || !vma->vm_ops->fault;
358 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
360 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
362 #ifdef __HAVE_COLOR_ZERO_PAGE
364 static inline int swap_is_zero_pfn(unsigned long pfn)
366 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
367 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
370 #else /* __HAVE_COLOR_ZERO_PAGE */
372 static inline int swap_is_zero_pfn(unsigned long pfn)
374 return pfn == swap_zero_pfn;
376 #endif /* __HAVE_COLOR_ZERO_PAGE */
378 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
380 static inline int swap_is_zero_pfn(unsigned long pfn)
383 return pfn == swap_zero_pfn;
384 #else /* is_zero_pfn */
385 return is_zero_pfn(pfn);
386 #endif /* is_zero_pfn */
389 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
391 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
393 return stack_guard_page_start(vma, addr) ||
394 stack_guard_page_end(vma, addr+PAGE_SIZE);
397 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
399 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
400 unsigned long address, unsigned int foll_flags)
402 unsigned int unused_page_mask;
403 return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
406 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
407 unsigned long start, unsigned long nr_pages,
408 unsigned int gup_flags, struct page **pages,
409 struct vm_area_struct **vmas, int *nonblocking)
412 unsigned long vm_flags;
413 unsigned int page_mask;
418 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
421 * Require read or write permissions.
422 * If FOLL_FORCE is set, we only require the "MAY" flags.
424 vm_flags = (gup_flags & FOLL_WRITE) ?
425 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
426 vm_flags &= (gup_flags & FOLL_FORCE) ?
427 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
430 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
431 * would be called on PROT_NONE ranges. We must never invoke
432 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
433 * page faults would unprotect the PROT_NONE ranges if
434 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
435 * bitflag. So to avoid that, don't set FOLL_NUMA if
438 if (!(gup_flags & FOLL_FORCE))
439 gup_flags |= FOLL_NUMA;
444 struct vm_area_struct *vma;
446 vma = find_extend_vma(mm, start);
447 if (!vma && dbi_in_gate_area(tsk, start)) {
448 unsigned long pg = start & PAGE_MASK;
454 /* user gate pages are read-only */
455 if (gup_flags & FOLL_WRITE)
456 return i ? : -EFAULT;
458 pgd = pgd_offset_k(pg);
460 pgd = pgd_offset_gate(mm, pg);
461 BUG_ON(pgd_none(*pgd));
462 pud = pud_offset(pgd, pg);
463 BUG_ON(pud_none(*pud));
464 pmd = pmd_offset(pud, pg);
466 return i ? : -EFAULT;
467 VM_BUG_ON(pmd_trans_huge(*pmd));
468 pte = pte_offset_map(pmd, pg);
469 if (pte_none(*pte)) {
471 return i ? : -EFAULT;
473 vma = get_gate_vma(mm);
477 page = vm_normal_page(vma, start, *pte);
479 if (!(gup_flags & FOLL_DUMP) &&
480 swap_is_zero_pfn(pte_pfn(*pte)))
481 page = pte_page(*pte);
484 return i ? : -EFAULT;
496 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
497 !(vm_flags & vma->vm_flags))
498 return i ? : -EFAULT;
500 if (is_vm_hugetlb_page(vma)) {
501 i = follow_hugetlb_page(mm, vma, pages, vmas,
502 &start, &nr_pages, i, gup_flags);
508 unsigned int foll_flags = gup_flags;
509 unsigned int page_increm;
512 * If we have a pending SIGKILL, don't keep faulting
513 * pages and potentially allocating memory.
515 if (unlikely(fatal_signal_pending(current)))
516 return i ? i : -ERESTARTSYS;
518 /* cond_resched(); */
519 while (!(page = follow_page_mask(vma, start,
520 foll_flags, &page_mask))) {
522 unsigned int fault_flags = 0;
524 /* For mlock, just skip the stack guard page. */
525 if (foll_flags & FOLL_MLOCK) {
526 if (stack_guard_page(vma, start))
529 if (foll_flags & FOLL_WRITE)
530 fault_flags |= FAULT_FLAG_WRITE;
532 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
533 if (foll_flags & FOLL_NOWAIT)
534 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
536 ret = handle_mm_fault(mm, vma, start,
539 if (ret & VM_FAULT_ERROR) {
540 if (ret & VM_FAULT_OOM)
541 return i ? i : -ENOMEM;
542 if (ret & (VM_FAULT_HWPOISON |
543 VM_FAULT_HWPOISON_LARGE)) {
546 else if (gup_flags & FOLL_HWPOISON)
551 if (ret & VM_FAULT_SIGBUS)
552 return i ? i : -EFAULT;
557 if (ret & VM_FAULT_MAJOR)
563 if (ret & VM_FAULT_RETRY) {
570 * The VM_FAULT_WRITE bit tells us that
571 * do_wp_page has broken COW when necessary,
572 * even if maybe_mkwrite decided not to set
573 * pte_write. We can thus safely do subsequent
574 * page lookups as if they were reads. But only
575 * do so when looping for pte_write is futile:
576 * in some cases userspace may also be wanting
577 * to write to the gotten user page, which a
578 * read fault here might prevent (a readonly
579 * page might get reCOWed by userspace write).
581 if ((ret & VM_FAULT_WRITE) &&
582 !(vma->vm_flags & VM_WRITE))
583 foll_flags &= ~FOLL_WRITE;
585 /* cond_resched(); */
588 return i ? i : PTR_ERR(page);
592 flush_anon_page(vma, page, start);
593 flush_dcache_page(page);
601 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
602 if (page_increm > nr_pages)
603 page_increm = nr_pages;
605 start += page_increm * PAGE_SIZE;
606 nr_pages -= page_increm;
607 } while (nr_pages && start < vma->vm_end);
612 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
614 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
615 unsigned long start, int nr_pages, unsigned int gup_flags,
616 struct page **pages, struct vm_area_struct **vmas,
620 unsigned long vm_flags;
626 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
629 * Require read or write permissions.
630 * If FOLL_FORCE is set, we only require the "MAY" flags.
632 vm_flags = (gup_flags & FOLL_WRITE) ?
633 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
634 vm_flags &= (gup_flags & FOLL_FORCE) ?
635 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
639 struct vm_area_struct *vma;
641 vma = find_extend_vma(mm, start);
642 if (!vma && dbi_in_gate_area_no_xxx(start)) {
643 unsigned long pg = start & PAGE_MASK;
649 /* user gate pages are read-only */
650 if (gup_flags & FOLL_WRITE) {
651 return i ? : -EFAULT;
654 pgd = pgd_offset_k(pg);
656 pgd = pgd_offset_gate(mm, pg);
657 BUG_ON(pgd_none(*pgd));
658 pud = pud_offset(pgd, pg);
659 BUG_ON(pud_none(*pud));
660 pmd = pmd_offset(pud, pg);
661 if (pmd_none(*pmd)) {
662 return i ? : -EFAULT;
664 VM_BUG_ON(pmd_trans_huge(*pmd));
665 pte = pte_offset_map(pmd, pg);
666 if (pte_none(*pte)) {
668 return i ? : -EFAULT;
670 vma = get_gate_vma(mm);
674 page = vm_normal_page(vma, start, *pte);
676 if (!(gup_flags & FOLL_DUMP) &&
677 swap_is_zero_pfn(pte_pfn(*pte)))
678 page = pte_page(*pte);
681 return i ? : -EFAULT;
692 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
693 !(vm_flags & vma->vm_flags)) {
694 return i ? : -EFAULT;
697 if (is_vm_hugetlb_page(vma)) {
698 i = follow_hugetlb_page(mm, vma, pages, vmas,
699 &start, &nr_pages, i, gup_flags);
705 unsigned int foll_flags = gup_flags;
708 * If we have a pending SIGKILL, don't keep faulting
709 * pages and potentially allocating memory.
711 if (unlikely(fatal_signal_pending(current))) {
712 return i ? i : -ERESTARTSYS;
715 /* cond_resched(); */
716 while (!(page = follow_page(vma, start, foll_flags))) {
718 unsigned int fault_flags = 0;
720 /* For mlock, just skip the stack guard page. */
721 if (foll_flags & FOLL_MLOCK) {
722 if (stack_guard_page(vma, start))
725 if (foll_flags & FOLL_WRITE)
726 fault_flags |= FAULT_FLAG_WRITE;
728 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
729 if (foll_flags & FOLL_NOWAIT)
730 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
732 ret = handle_mm_fault(mm, vma, start,
735 if (ret & VM_FAULT_ERROR) {
736 if (ret & VM_FAULT_OOM) {
737 return i ? i : -ENOMEM;
739 if (ret & (VM_FAULT_HWPOISON |
740 VM_FAULT_HWPOISON_LARGE)) {
744 else if (gup_flags & FOLL_HWPOISON) {
751 if (ret & VM_FAULT_SIGBUS) {
752 return i ? i : -EFAULT;
758 if (ret & VM_FAULT_MAJOR)
764 if (ret & VM_FAULT_RETRY) {
771 * The VM_FAULT_WRITE bit tells us that
772 * do_wp_page has broken COW when necessary,
773 * even if maybe_mkwrite decided not to set
774 * pte_write. We can thus safely do subsequent
775 * page lookups as if they were reads. But only
776 * do so when looping for pte_write is futile:
777 * in some cases userspace may also be wanting
778 * to write to the gotten user page, which a
779 * read fault here might prevent (a readonly
780 * page might get reCOWed by userspace write).
782 if ((ret & VM_FAULT_WRITE) &&
783 !(vma->vm_flags & VM_WRITE))
784 foll_flags &= ~FOLL_WRITE;
786 /* cond_resched(); */
789 return i ? i : PTR_ERR(page);
794 flush_anon_page(vma, page, start);
795 flush_dcache_page(page);
803 } while (nr_pages && start < vma->vm_end);
809 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
811 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
813 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
814 unsigned long start, int len, int flags,
815 struct page **pages, struct vm_area_struct **vmas)
818 unsigned int vm_flags = 0;
819 int write = !!(flags & GUP_FLAGS_WRITE);
820 int force = !!(flags & GUP_FLAGS_FORCE);
821 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
826 * Require read or write permissions.
827 * If 'force' is set, we only require the "MAY" flags.
829 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
830 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
834 struct vm_area_struct *vma;
835 unsigned int foll_flags;
837 vma = find_vma(mm, start);
838 if (!vma && dbi_in_gate_area(tsk, start)) {
839 unsigned long pg = start & PAGE_MASK;
840 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
846 /* user gate pages are read-only */
847 if (!ignore && write)
848 return i ? : -EFAULT;
850 pgd = pgd_offset_k(pg);
852 pgd = pgd_offset_gate(mm, pg);
853 BUG_ON(pgd_none(*pgd));
854 pud = pud_offset(pgd, pg);
855 BUG_ON(pud_none(*pud));
856 pmd = pmd_offset(pud, pg);
858 return i ? : -EFAULT;
859 pte = pte_offset_map(pmd, pg);
860 if (pte_none(*pte)) {
862 return i ? : -EFAULT;
865 struct page *page = vm_normal_page(gate_vma, start, *pte);
880 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
881 (!ignore && !(vm_flags & vma->vm_flags)))
882 return i ? : -EFAULT;
884 if (is_vm_hugetlb_page(vma)) {
885 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
886 i = follow_hugetlb_page(mm, vma, pages, vmas,
889 i = follow_hugetlb_page(mm, vma, pages, vmas,
890 &start, &len, i, write);
895 foll_flags = FOLL_TOUCH;
897 foll_flags |= FOLL_GET;
899 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
900 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
901 if (!write && use_zero_page(vma))
902 foll_flags |= FOLL_ANON;
910 foll_flags |= FOLL_WRITE;
915 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
916 while (!(page = follow_page(vma, start, foll_flags))) {
918 ret = handle_mm_fault(mm, vma, start,
919 foll_flags & FOLL_WRITE);
921 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
922 if (ret & VM_FAULT_WRITE)
923 foll_flags &= ~FOLL_WRITE;
925 switch (ret & ~VM_FAULT_WRITE) {
932 case VM_FAULT_SIGBUS:
933 return i ? i : -EFAULT;
935 return i ? i : -ENOMEM;
941 if (ret & VM_FAULT_ERROR) {
942 if (ret & VM_FAULT_OOM)
943 return i ? i : -ENOMEM;
944 else if (ret & VM_FAULT_SIGBUS)
945 return i ? i : -EFAULT;
948 if (ret & VM_FAULT_MAJOR)
954 * The VM_FAULT_WRITE bit tells us that
955 * do_wp_page has broken COW when necessary,
956 * even if maybe_mkwrite decided not to set
957 * pte_write. We can thus safely do subsequent
958 * page lookups as if they were reads. But only
959 * do so when looping for pte_write is futile:
960 * in some cases userspace may also be wanting
961 * to write to the gotten user page, which a
962 * read fault here might prevent (a readonly
963 * page might get reCOWed by userspace write).
965 if ((ret & VM_FAULT_WRITE) &&
966 !(vma->vm_flags & VM_WRITE))
967 foll_flags &= ~FOLL_WRITE;
975 return i ? i : PTR_ERR(page);
979 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
980 flush_anon_page(page, start);
982 flush_anon_page(vma, page, start);
984 flush_dcache_page(page);
991 } while (len && start < vma->vm_end);
996 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
998 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
999 unsigned long start, int len, int write, int force,
1000 struct page **pages, struct vm_area_struct **vmas)
1002 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1003 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1004 int flags = FOLL_TOUCH;
1009 flags |= FOLL_WRITE;
1011 flags |= FOLL_FORCE;
1012 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1016 flags |= GUP_FLAGS_WRITE;
1018 flags |= GUP_FLAGS_FORCE;
1019 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1021 return __get_user_pages_uprobe(tsk, mm,
1023 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1025 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1027 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1029 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1033 #define ACCESS_PROCESS_OPTIMIZATION 0
1035 #if ACCESS_PROCESS_OPTIMIZATION
1037 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1038 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1040 static void read_data_current(unsigned long addr, void *buf, int len)
1045 for (step = GET_STEP_4(len); len; len -= step) {
1046 switch (GET_STEP_4(len)) {
1048 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1054 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1059 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1069 static void write_data_current(unsigned long addr, void *buf, int len)
1074 for (step = GET_STEP_4(len); len; len -= step) {
1075 switch (GET_STEP_4(len)) {
1077 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1083 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1088 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1098 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1100 struct mm_struct *mm;
1101 struct vm_area_struct *vma;
1102 void *old_buf = buf;
1108 #if ACCESS_PROCESS_OPTIMIZATION
1109 if (write == 0 && tsk == current) {
1110 read_data_current(addr, buf, len);
1115 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1119 /* ignore errors, just check how much was successfully transferred */
1121 int bytes, ret, offset;
1123 struct page *page = NULL;
1125 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1126 write, 1, &page, &vma);
1130 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1131 * we can access using slightly different code.
1133 #ifdef CONFIG_HAVE_IOREMAP_PROT
1134 vma = find_vma(mm, addr);
1137 if (vma->vm_ops && vma->vm_ops->access)
1138 ret = vma->vm_ops->access(vma, addr, buf,
1146 offset = addr & (PAGE_SIZE-1);
1147 if (bytes > PAGE_SIZE-offset)
1148 bytes = PAGE_SIZE-offset;
1150 maddr = dbi_kmap_atomic(page);
1153 copy_to_user_page(vma, page, addr,
1154 maddr + offset, buf, bytes);
1155 set_page_dirty_lock(page);
1157 copy_from_user_page(vma, page, addr,
1158 buf, maddr + offset, bytes);
1161 dbi_kunmap_atomic(maddr);
1162 page_cache_release(page);
1169 return buf - old_buf;
1172 int page_present (struct mm_struct *mm, unsigned long address)
1180 pgd = pgd_offset(mm, address);
1181 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1184 pud = pud_offset(pgd, address);
1185 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1188 pmd = pmd_offset(pud, address);
1189 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1192 ptep = pte_offset_map(pmd, address);
1198 if (pte_present(pte)) {
1200 if (pfn_valid(pfn)) {
1210 EXPORT_SYMBOL_GPL (page_present);
1211 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
1212 EXPORT_SYMBOL_GPL (access_process_vm_atomic);