2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
36 #include <linux/slab.h>
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
42 unsigned long sys_exit_group_addr;
43 unsigned long do_group_exit_addr;
45 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
46 /* kernel define 'pgd_offset_k' redefinition */
48 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
51 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
54 static unsigned long swap_zero_pfn = 0;
56 #endif /* is_zero_pfn */
57 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
59 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
60 static inline void *dbi_kmap_atomic(struct page *page)
62 return kmap_atomic(page);
64 static inline void dbi_kunmap_atomic(void *kvaddr)
66 kunmap_atomic(kvaddr);
68 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
69 static inline void *dbi_kmap_atomic(struct page *page)
71 return kmap_atomic(page, KM_USER0);
74 static inline void dbi_kunmap_atomic(void *kvaddr)
76 kunmap_atomic(kvaddr, KM_USER0);
78 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
80 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
81 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
82 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
83 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
84 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
85 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
86 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
87 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
88 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
90 #ifdef LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
91 EXPORT_SYMBOL_GPL(do_mmap_pgoff);
92 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
94 /* copy_to_user_page */
95 #ifndef copy_to_user_page
96 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
97 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
98 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
99 #endif /* copy_to_user_page */
102 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
104 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
106 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
107 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
108 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
111 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
112 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
114 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
115 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
116 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
117 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
118 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
120 #ifdef CONFIG_HUGETLB_PAGE
121 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
122 struct vm_area_struct *vma, struct page **pages, \
123 struct vm_area_struct **vmas, unsigned long *position, int *length, \
127 #ifdef __HAVE_ARCH_GATE_AREA
128 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
129 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
130 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
131 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
132 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
133 #endif /* __HAVE_ARCH_GATE_AREA */
135 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
136 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
137 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
138 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
139 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
141 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
142 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
143 struct page *, struct vm_area_struct * vma, \
144 unsigned long address, unsigned int foll_flags, \
145 unsigned int *page_mask);
146 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
147 struct vm_area_struct * vma, \
148 unsigned long address, \
149 unsigned int foll_flags, \
150 unsigned int *page_mask)
151 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
152 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
153 static DECLARE_MOD_FUNC_DEP(follow_page, \
154 struct page *, struct vm_area_struct * vma, \
155 unsigned long address, unsigned int foll_flags);
156 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
157 struct vm_area_struct * vma, \
158 unsigned long address, \
159 unsigned int foll_flags)
160 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
161 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
163 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
164 void, struct vm_area_struct *vma, struct page *page, \
165 unsigned long vmaddr);
166 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
167 struct page *, struct vm_area_struct *vma, \
168 unsigned long addr, pte_t pte);
169 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
170 void, struct vm_area_struct *vma, struct page *page, \
171 unsigned long uaddr, void *kaddr, unsigned long len, int write);
174 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
175 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
176 void, struct task_struct *tsk);
178 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
179 void, struct rcu_head * rhp);
182 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
183 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
185 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
186 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
188 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
189 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
190 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
191 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
192 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
195 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
196 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
197 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
200 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
201 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
202 struct vm_area_struct *, struct mm_struct *mm)
203 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
204 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
205 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
206 struct vm_area_struct *, struct task_struct *tsk)
207 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
208 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
210 #ifdef CONFIG_HUGETLB_PAGE
211 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
212 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
215 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
217 #ifdef __HAVE_ARCH_GATE_AREA
218 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
219 struct mm_struct *mm = task->mm;
220 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
221 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
222 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
223 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
224 #else /*__HAVE_ARCH_GATE_AREA */
225 return in_gate_area(task, addr);
226 #endif/*__HAVE_ARCH_GATE_AREA */
230 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
231 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
232 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
233 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
234 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
235 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
236 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
238 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
240 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
241 return in_gate_area_no_mm(addr);
242 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
243 return in_gate_area_no_task(addr);
244 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
247 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
248 void, struct vm_area_struct *vma, \
249 struct page *page, unsigned long vmaddr)
250 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
252 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
253 struct page *, struct vm_area_struct *vma, \
254 unsigned long addr, pte_t pte)
255 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
257 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
258 void, struct vm_area_struct *vma, struct page *page, \
259 unsigned long uaddr, void *kaddr, unsigned long len, int write)
260 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
264 int init_module_dependencies(void)
267 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
268 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
271 #ifndef copy_to_user_page
272 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
273 #endif /* copy_to_user_page */
275 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
276 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
277 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
279 #ifdef CONFIG_HUGETLB_PAGE
280 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
283 #ifdef __HAVE_ARCH_GATE_AREA
284 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
287 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
288 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
289 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
290 INIT_MOD_DEP_VAR(follow_page, follow_page);
291 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
293 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
296 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
297 #endif /* is_zero_pfn */
299 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
300 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
301 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
302 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
304 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
305 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
306 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
308 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
309 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
310 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
312 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
315 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
317 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
318 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
319 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
324 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
325 #define GUP_FLAGS_WRITE 0x1
326 #define GUP_FLAGS_FORCE 0x2
327 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
328 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
329 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
331 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
332 static inline int use_zero_page(struct vm_area_struct *vma)
335 * We don't want to optimize FOLL_ANON for make_pages_present()
336 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
337 * we want to get the page from the page tables to make sure
338 * that we serialize and update with any other user of that
341 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
344 * And if we have a fault routine, it's not an anonymous region.
346 return !vma->vm_ops || !vma->vm_ops->fault;
349 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
351 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
353 #ifdef __HAVE_COLOR_ZERO_PAGE
355 static inline int swap_is_zero_pfn(unsigned long pfn)
357 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
358 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
361 #else /* __HAVE_COLOR_ZERO_PAGE */
363 static inline int swap_is_zero_pfn(unsigned long pfn)
365 return pfn == swap_zero_pfn;
367 #endif /* __HAVE_COLOR_ZERO_PAGE */
369 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
371 static inline int swap_is_zero_pfn(unsigned long pfn)
374 return pfn == swap_zero_pfn;
375 #else /* is_zero_pfn */
376 return is_zero_pfn(pfn);
377 #endif /* is_zero_pfn */
380 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
382 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
384 return stack_guard_page_start(vma, addr) ||
385 stack_guard_page_end(vma, addr+PAGE_SIZE);
388 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
390 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
391 unsigned long address, unsigned int foll_flags)
393 unsigned int unused_page_mask;
394 return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
397 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
398 unsigned long start, unsigned long nr_pages,
399 unsigned int gup_flags, struct page **pages,
400 struct vm_area_struct **vmas, int *nonblocking)
403 unsigned long vm_flags;
404 unsigned int page_mask;
409 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
412 * Require read or write permissions.
413 * If FOLL_FORCE is set, we only require the "MAY" flags.
415 vm_flags = (gup_flags & FOLL_WRITE) ?
416 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
417 vm_flags &= (gup_flags & FOLL_FORCE) ?
418 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
421 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
422 * would be called on PROT_NONE ranges. We must never invoke
423 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
424 * page faults would unprotect the PROT_NONE ranges if
425 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
426 * bitflag. So to avoid that, don't set FOLL_NUMA if
429 if (!(gup_flags & FOLL_FORCE))
430 gup_flags |= FOLL_NUMA;
435 struct vm_area_struct *vma;
437 vma = find_extend_vma(mm, start);
438 if (!vma && dbi_in_gate_area(tsk, start)) {
439 unsigned long pg = start & PAGE_MASK;
445 /* user gate pages are read-only */
446 if (gup_flags & FOLL_WRITE)
447 return i ? : -EFAULT;
449 pgd = pgd_offset_k(pg);
451 pgd = pgd_offset_gate(mm, pg);
452 BUG_ON(pgd_none(*pgd));
453 pud = pud_offset(pgd, pg);
454 BUG_ON(pud_none(*pud));
455 pmd = pmd_offset(pud, pg);
457 return i ? : -EFAULT;
458 VM_BUG_ON(pmd_trans_huge(*pmd));
459 pte = pte_offset_map(pmd, pg);
460 if (pte_none(*pte)) {
462 return i ? : -EFAULT;
464 vma = get_gate_vma(mm);
468 page = vm_normal_page(vma, start, *pte);
470 if (!(gup_flags & FOLL_DUMP) &&
471 swap_is_zero_pfn(pte_pfn(*pte)))
472 page = pte_page(*pte);
475 return i ? : -EFAULT;
487 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
488 !(vm_flags & vma->vm_flags))
489 return i ? : -EFAULT;
491 if (is_vm_hugetlb_page(vma)) {
492 i = follow_hugetlb_page(mm, vma, pages, vmas,
493 &start, &nr_pages, i, gup_flags);
499 unsigned int foll_flags = gup_flags;
500 unsigned int page_increm;
503 * If we have a pending SIGKILL, don't keep faulting
504 * pages and potentially allocating memory.
506 if (unlikely(fatal_signal_pending(current)))
507 return i ? i : -ERESTARTSYS;
509 /* cond_resched(); */
510 while (!(page = follow_page_mask(vma, start,
511 foll_flags, &page_mask))) {
513 unsigned int fault_flags = 0;
515 /* For mlock, just skip the stack guard page. */
516 if (foll_flags & FOLL_MLOCK) {
517 if (stack_guard_page(vma, start))
520 if (foll_flags & FOLL_WRITE)
521 fault_flags |= FAULT_FLAG_WRITE;
523 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
524 if (foll_flags & FOLL_NOWAIT)
525 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
527 ret = handle_mm_fault(mm, vma, start,
530 if (ret & VM_FAULT_ERROR) {
531 if (ret & VM_FAULT_OOM)
532 return i ? i : -ENOMEM;
533 if (ret & (VM_FAULT_HWPOISON |
534 VM_FAULT_HWPOISON_LARGE)) {
537 else if (gup_flags & FOLL_HWPOISON)
542 if (ret & VM_FAULT_SIGBUS)
543 return i ? i : -EFAULT;
548 if (ret & VM_FAULT_MAJOR)
554 if (ret & VM_FAULT_RETRY) {
561 * The VM_FAULT_WRITE bit tells us that
562 * do_wp_page has broken COW when necessary,
563 * even if maybe_mkwrite decided not to set
564 * pte_write. We can thus safely do subsequent
565 * page lookups as if they were reads. But only
566 * do so when looping for pte_write is futile:
567 * in some cases userspace may also be wanting
568 * to write to the gotten user page, which a
569 * read fault here might prevent (a readonly
570 * page might get reCOWed by userspace write).
572 if ((ret & VM_FAULT_WRITE) &&
573 !(vma->vm_flags & VM_WRITE))
574 foll_flags &= ~FOLL_WRITE;
576 /* cond_resched(); */
579 return i ? i : PTR_ERR(page);
583 flush_anon_page(vma, page, start);
584 flush_dcache_page(page);
592 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
593 if (page_increm > nr_pages)
594 page_increm = nr_pages;
596 start += page_increm * PAGE_SIZE;
597 nr_pages -= page_increm;
598 } while (nr_pages && start < vma->vm_end);
603 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
605 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
606 unsigned long start, int nr_pages, unsigned int gup_flags,
607 struct page **pages, struct vm_area_struct **vmas,
611 unsigned long vm_flags;
617 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
620 * Require read or write permissions.
621 * If FOLL_FORCE is set, we only require the "MAY" flags.
623 vm_flags = (gup_flags & FOLL_WRITE) ?
624 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
625 vm_flags &= (gup_flags & FOLL_FORCE) ?
626 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
630 struct vm_area_struct *vma;
632 vma = find_extend_vma(mm, start);
633 if (!vma && dbi_in_gate_area_no_xxx(start)) {
634 unsigned long pg = start & PAGE_MASK;
640 /* user gate pages are read-only */
641 if (gup_flags & FOLL_WRITE) {
642 return i ? : -EFAULT;
645 pgd = pgd_offset_k(pg);
647 pgd = pgd_offset_gate(mm, pg);
648 BUG_ON(pgd_none(*pgd));
649 pud = pud_offset(pgd, pg);
650 BUG_ON(pud_none(*pud));
651 pmd = pmd_offset(pud, pg);
652 if (pmd_none(*pmd)) {
653 return i ? : -EFAULT;
655 VM_BUG_ON(pmd_trans_huge(*pmd));
656 pte = pte_offset_map(pmd, pg);
657 if (pte_none(*pte)) {
659 return i ? : -EFAULT;
661 vma = get_gate_vma(mm);
665 page = vm_normal_page(vma, start, *pte);
667 if (!(gup_flags & FOLL_DUMP) &&
668 swap_is_zero_pfn(pte_pfn(*pte)))
669 page = pte_page(*pte);
672 return i ? : -EFAULT;
683 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
684 !(vm_flags & vma->vm_flags)) {
685 return i ? : -EFAULT;
688 if (is_vm_hugetlb_page(vma)) {
689 i = follow_hugetlb_page(mm, vma, pages, vmas,
690 &start, &nr_pages, i, gup_flags);
696 unsigned int foll_flags = gup_flags;
699 * If we have a pending SIGKILL, don't keep faulting
700 * pages and potentially allocating memory.
702 if (unlikely(fatal_signal_pending(current))) {
703 return i ? i : -ERESTARTSYS;
706 /* cond_resched(); */
707 while (!(page = follow_page(vma, start, foll_flags))) {
709 unsigned int fault_flags = 0;
711 /* For mlock, just skip the stack guard page. */
712 if (foll_flags & FOLL_MLOCK) {
713 if (stack_guard_page(vma, start))
716 if (foll_flags & FOLL_WRITE)
717 fault_flags |= FAULT_FLAG_WRITE;
719 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
720 if (foll_flags & FOLL_NOWAIT)
721 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
723 ret = handle_mm_fault(mm, vma, start,
726 if (ret & VM_FAULT_ERROR) {
727 if (ret & VM_FAULT_OOM) {
728 return i ? i : -ENOMEM;
730 if (ret & (VM_FAULT_HWPOISON |
731 VM_FAULT_HWPOISON_LARGE)) {
735 else if (gup_flags & FOLL_HWPOISON) {
742 if (ret & VM_FAULT_SIGBUS) {
743 return i ? i : -EFAULT;
749 if (ret & VM_FAULT_MAJOR)
755 if (ret & VM_FAULT_RETRY) {
762 * The VM_FAULT_WRITE bit tells us that
763 * do_wp_page has broken COW when necessary,
764 * even if maybe_mkwrite decided not to set
765 * pte_write. We can thus safely do subsequent
766 * page lookups as if they were reads. But only
767 * do so when looping for pte_write is futile:
768 * in some cases userspace may also be wanting
769 * to write to the gotten user page, which a
770 * read fault here might prevent (a readonly
771 * page might get reCOWed by userspace write).
773 if ((ret & VM_FAULT_WRITE) &&
774 !(vma->vm_flags & VM_WRITE))
775 foll_flags &= ~FOLL_WRITE;
777 /* cond_resched(); */
780 return i ? i : PTR_ERR(page);
785 flush_anon_page(vma, page, start);
786 flush_dcache_page(page);
794 } while (nr_pages && start < vma->vm_end);
800 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
802 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
804 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
805 unsigned long start, int len, int flags,
806 struct page **pages, struct vm_area_struct **vmas)
809 unsigned int vm_flags = 0;
810 int write = !!(flags & GUP_FLAGS_WRITE);
811 int force = !!(flags & GUP_FLAGS_FORCE);
812 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
817 * Require read or write permissions.
818 * If 'force' is set, we only require the "MAY" flags.
820 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
821 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
825 struct vm_area_struct *vma;
826 unsigned int foll_flags;
828 vma = find_vma(mm, start);
829 if (!vma && dbi_in_gate_area(tsk, start)) {
830 unsigned long pg = start & PAGE_MASK;
831 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
837 /* user gate pages are read-only */
838 if (!ignore && write)
839 return i ? : -EFAULT;
841 pgd = pgd_offset_k(pg);
843 pgd = pgd_offset_gate(mm, pg);
844 BUG_ON(pgd_none(*pgd));
845 pud = pud_offset(pgd, pg);
846 BUG_ON(pud_none(*pud));
847 pmd = pmd_offset(pud, pg);
849 return i ? : -EFAULT;
850 pte = pte_offset_map(pmd, pg);
851 if (pte_none(*pte)) {
853 return i ? : -EFAULT;
856 struct page *page = vm_normal_page(gate_vma, start, *pte);
871 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
872 (!ignore && !(vm_flags & vma->vm_flags)))
873 return i ? : -EFAULT;
875 if (is_vm_hugetlb_page(vma)) {
876 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
877 i = follow_hugetlb_page(mm, vma, pages, vmas,
880 i = follow_hugetlb_page(mm, vma, pages, vmas,
881 &start, &len, i, write);
886 foll_flags = FOLL_TOUCH;
888 foll_flags |= FOLL_GET;
890 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
891 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
892 if (!write && use_zero_page(vma))
893 foll_flags |= FOLL_ANON;
901 foll_flags |= FOLL_WRITE;
906 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
907 while (!(page = follow_page(vma, start, foll_flags))) {
909 ret = handle_mm_fault(mm, vma, start,
910 foll_flags & FOLL_WRITE);
912 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
913 if (ret & VM_FAULT_WRITE)
914 foll_flags &= ~FOLL_WRITE;
916 switch (ret & ~VM_FAULT_WRITE) {
923 case VM_FAULT_SIGBUS:
924 return i ? i : -EFAULT;
926 return i ? i : -ENOMEM;
932 if (ret & VM_FAULT_ERROR) {
933 if (ret & VM_FAULT_OOM)
934 return i ? i : -ENOMEM;
935 else if (ret & VM_FAULT_SIGBUS)
936 return i ? i : -EFAULT;
939 if (ret & VM_FAULT_MAJOR)
945 * The VM_FAULT_WRITE bit tells us that
946 * do_wp_page has broken COW when necessary,
947 * even if maybe_mkwrite decided not to set
948 * pte_write. We can thus safely do subsequent
949 * page lookups as if they were reads. But only
950 * do so when looping for pte_write is futile:
951 * in some cases userspace may also be wanting
952 * to write to the gotten user page, which a
953 * read fault here might prevent (a readonly
954 * page might get reCOWed by userspace write).
956 if ((ret & VM_FAULT_WRITE) &&
957 !(vma->vm_flags & VM_WRITE))
958 foll_flags &= ~FOLL_WRITE;
966 return i ? i : PTR_ERR(page);
970 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
971 flush_anon_page(page, start);
973 flush_anon_page(vma, page, start);
975 flush_dcache_page(page);
982 } while (len && start < vma->vm_end);
987 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
989 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
990 unsigned long start, int len, int write, int force,
991 struct page **pages, struct vm_area_struct **vmas)
993 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
994 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
995 int flags = FOLL_TOUCH;
1000 flags |= FOLL_WRITE;
1002 flags |= FOLL_FORCE;
1003 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1007 flags |= GUP_FLAGS_WRITE;
1009 flags |= GUP_FLAGS_FORCE;
1010 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1012 return __get_user_pages_uprobe(tsk, mm,
1014 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1016 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1018 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1020 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1024 #define ACCESS_PROCESS_OPTIMIZATION 0
1026 #if ACCESS_PROCESS_OPTIMIZATION
1028 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1029 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1031 static void read_data_current(unsigned long addr, void *buf, int len)
1036 for (step = GET_STEP_4(len); len; len -= step) {
1037 switch (GET_STEP_4(len)) {
1039 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1045 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1050 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1060 static void write_data_current(unsigned long addr, void *buf, int len)
1065 for (step = GET_STEP_4(len); len; len -= step) {
1066 switch (GET_STEP_4(len)) {
1068 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1074 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1079 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1089 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1091 struct mm_struct *mm;
1092 struct vm_area_struct *vma;
1093 void *old_buf = buf;
1099 #if ACCESS_PROCESS_OPTIMIZATION
1100 if (write == 0 && tsk == current) {
1101 read_data_current(addr, buf, len);
1106 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1110 /* ignore errors, just check how much was successfully transferred */
1112 int bytes, ret, offset;
1114 struct page *page = NULL;
1116 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1117 write, 1, &page, &vma);
1121 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1122 * we can access using slightly different code.
1124 #ifdef CONFIG_HAVE_IOREMAP_PROT
1125 vma = find_vma(mm, addr);
1128 if (vma->vm_ops && vma->vm_ops->access)
1129 ret = vma->vm_ops->access(vma, addr, buf,
1137 offset = addr & (PAGE_SIZE-1);
1138 if (bytes > PAGE_SIZE-offset)
1139 bytes = PAGE_SIZE-offset;
1141 maddr = dbi_kmap_atomic(page);
1144 copy_to_user_page(vma, page, addr,
1145 maddr + offset, buf, bytes);
1146 set_page_dirty_lock(page);
1148 copy_from_user_page(vma, page, addr,
1149 buf, maddr + offset, bytes);
1152 dbi_kunmap_atomic(maddr);
1153 page_cache_release(page);
1160 return buf - old_buf;
1163 int page_present (struct mm_struct *mm, unsigned long address)
1171 pgd = pgd_offset(mm, address);
1172 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1175 pud = pud_offset(pgd, address);
1176 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1179 pmd = pmd_offset(pud, address);
1180 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1183 ptep = pte_offset_map(pmd, address);
1189 if (pte_present(pte)) {
1191 if (pfn_valid(pfn)) {
1201 EXPORT_SYMBOL_GPL (page_present);
1202 EXPORT_SYMBOL_GPL (access_process_vm_atomic);