2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
36 #include <linux/slab.h>
39 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
40 /* kernel define 'pgd_offset_k' redefinition */
42 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
45 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
48 static unsigned long swap_zero_pfn = 0;
50 #endif /* is_zero_pfn */
51 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
53 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
54 static inline void *dbi_kmap_atomic(struct page *page)
56 return kmap_atomic(page);
58 static inline void dbi_kunmap_atomic(void *kvaddr)
60 kunmap_atomic(kvaddr);
62 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
63 static inline void *dbi_kmap_atomic(struct page *page)
65 return kmap_atomic(page, KM_USER0);
68 static inline void dbi_kunmap_atomic(void *kvaddr)
70 kunmap_atomic(kvaddr, KM_USER0);
72 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
74 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
75 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
76 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
77 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
78 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
79 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
80 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
81 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
82 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
84 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
85 EXPORT_SYMBOL_GPL(do_mmap_pgoff);
86 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
88 /* copy_to_user_page */
89 #ifndef copy_to_user_page
90 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
91 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
92 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
93 #endif /* copy_to_user_page */
96 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
98 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
100 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
101 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
102 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
105 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
106 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
108 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
109 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
110 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
111 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
112 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
114 #ifdef CONFIG_HUGETLB_PAGE
115 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
116 struct vm_area_struct *vma, struct page **pages, \
117 struct vm_area_struct **vmas, unsigned long *position, int *length, \
121 #ifdef __HAVE_ARCH_GATE_AREA
122 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
123 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
124 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
125 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
126 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
127 #endif /* __HAVE_ARCH_GATE_AREA */
129 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
130 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
131 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
132 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
133 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
135 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
136 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
137 struct page *, struct vm_area_struct * vma, \
138 unsigned long address, unsigned int foll_flags, \
139 unsigned int *page_mask);
140 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
141 struct vm_area_struct * vma, \
142 unsigned long address, \
143 unsigned int foll_flags, \
144 unsigned int *page_mask)
145 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
146 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
147 static DECLARE_MOD_FUNC_DEP(follow_page, \
148 struct page *, struct vm_area_struct * vma, \
149 unsigned long address, unsigned int foll_flags);
150 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
151 struct vm_area_struct * vma, \
152 unsigned long address, \
153 unsigned int foll_flags)
154 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
155 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
157 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
158 void, struct vm_area_struct *vma, struct page *page, \
159 unsigned long vmaddr);
160 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
161 struct page *, struct vm_area_struct *vma, \
162 unsigned long addr, pte_t pte);
163 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
164 void, struct vm_area_struct *vma, struct page *page, \
165 unsigned long uaddr, void *kaddr, unsigned long len, int write);
168 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
169 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
170 void, struct task_struct *tsk);
172 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
173 void, struct rcu_head * rhp);
176 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
177 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
179 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
180 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
182 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
183 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
184 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
185 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
186 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
189 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
190 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
191 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
194 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
195 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
196 struct vm_area_struct *, struct mm_struct *mm)
197 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
198 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
199 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
200 struct vm_area_struct *, struct task_struct *tsk)
201 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
202 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
204 #ifdef CONFIG_HUGETLB_PAGE
205 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
206 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
209 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
211 #ifdef __HAVE_ARCH_GATE_AREA
212 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
213 struct mm_struct *mm = task->mm;
214 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
215 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
216 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
218 #else /*__HAVE_ARCH_GATE_AREA */
219 return in_gate_area(task, addr);
220 #endif/*__HAVE_ARCH_GATE_AREA */
224 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
225 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
226 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
227 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
228 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
229 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
230 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
232 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
234 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
235 return in_gate_area_no_mm(addr);
236 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
237 return in_gate_area_no_task(addr);
238 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
241 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
242 void, struct vm_area_struct *vma, \
243 struct page *page, unsigned long vmaddr)
244 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
246 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
247 struct page *, struct vm_area_struct *vma, \
248 unsigned long addr, pte_t pte)
249 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
251 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
252 void, struct vm_area_struct *vma, struct page *page, \
253 unsigned long uaddr, void *kaddr, unsigned long len, int write)
254 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
258 int init_module_dependencies(void)
261 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
262 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
265 #ifndef copy_to_user_page
266 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
267 #endif /* copy_to_user_page */
269 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
270 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
271 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
273 #ifdef CONFIG_HUGETLB_PAGE
274 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
277 #ifdef __HAVE_ARCH_GATE_AREA
278 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
281 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
282 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
283 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
284 INIT_MOD_DEP_VAR(follow_page, follow_page);
285 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
287 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
290 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
291 #endif /* is_zero_pfn */
293 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
294 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
295 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
296 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
298 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
299 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
300 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
302 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
303 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
304 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
306 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
309 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
311 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
312 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
313 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
318 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
319 #define GUP_FLAGS_WRITE 0x1
320 #define GUP_FLAGS_FORCE 0x2
321 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
322 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
323 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
325 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
326 static inline int use_zero_page(struct vm_area_struct *vma)
329 * We don't want to optimize FOLL_ANON for make_pages_present()
330 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
331 * we want to get the page from the page tables to make sure
332 * that we serialize and update with any other user of that
335 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
338 * And if we have a fault routine, it's not an anonymous region.
340 return !vma->vm_ops || !vma->vm_ops->fault;
343 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
345 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
347 #ifdef __HAVE_COLOR_ZERO_PAGE
349 static inline int swap_is_zero_pfn(unsigned long pfn)
351 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
352 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
355 #else /* __HAVE_COLOR_ZERO_PAGE */
357 static inline int swap_is_zero_pfn(unsigned long pfn)
359 return pfn == swap_zero_pfn;
361 #endif /* __HAVE_COLOR_ZERO_PAGE */
363 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
365 static inline int swap_is_zero_pfn(unsigned long pfn)
368 return pfn == swap_zero_pfn;
369 #else /* is_zero_pfn */
370 return is_zero_pfn(pfn);
371 #endif /* is_zero_pfn */
374 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
376 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
378 return stack_guard_page_start(vma, addr) ||
379 stack_guard_page_end(vma, addr+PAGE_SIZE);
382 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
384 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
385 unsigned long address, unsigned int foll_flags)
387 unsigned int unused_page_mask;
388 return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
391 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
392 unsigned long start, unsigned long nr_pages,
393 unsigned int gup_flags, struct page **pages,
394 struct vm_area_struct **vmas, int *nonblocking)
397 unsigned long vm_flags;
398 unsigned int page_mask;
403 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
406 * Require read or write permissions.
407 * If FOLL_FORCE is set, we only require the "MAY" flags.
409 vm_flags = (gup_flags & FOLL_WRITE) ?
410 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
411 vm_flags &= (gup_flags & FOLL_FORCE) ?
412 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
415 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
416 * would be called on PROT_NONE ranges. We must never invoke
417 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
418 * page faults would unprotect the PROT_NONE ranges if
419 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
420 * bitflag. So to avoid that, don't set FOLL_NUMA if
423 if (!(gup_flags & FOLL_FORCE))
424 gup_flags |= FOLL_NUMA;
429 struct vm_area_struct *vma;
431 vma = find_extend_vma(mm, start);
432 if (!vma && dbi_in_gate_area(tsk, start)) {
433 unsigned long pg = start & PAGE_MASK;
439 /* user gate pages are read-only */
440 if (gup_flags & FOLL_WRITE)
441 return i ? : -EFAULT;
443 pgd = pgd_offset_k(pg);
445 pgd = pgd_offset_gate(mm, pg);
446 BUG_ON(pgd_none(*pgd));
447 pud = pud_offset(pgd, pg);
448 BUG_ON(pud_none(*pud));
449 pmd = pmd_offset(pud, pg);
451 return i ? : -EFAULT;
452 VM_BUG_ON(pmd_trans_huge(*pmd));
453 pte = pte_offset_map(pmd, pg);
454 if (pte_none(*pte)) {
456 return i ? : -EFAULT;
458 vma = get_gate_vma(mm);
462 page = vm_normal_page(vma, start, *pte);
464 if (!(gup_flags & FOLL_DUMP) &&
465 swap_is_zero_pfn(pte_pfn(*pte)))
466 page = pte_page(*pte);
469 return i ? : -EFAULT;
481 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
482 !(vm_flags & vma->vm_flags))
483 return i ? : -EFAULT;
485 if (is_vm_hugetlb_page(vma)) {
486 i = follow_hugetlb_page(mm, vma, pages, vmas,
487 &start, &nr_pages, i, gup_flags);
493 unsigned int foll_flags = gup_flags;
494 unsigned int page_increm;
497 * If we have a pending SIGKILL, don't keep faulting
498 * pages and potentially allocating memory.
500 if (unlikely(fatal_signal_pending(current)))
501 return i ? i : -ERESTARTSYS;
503 /* cond_resched(); */
504 while (!(page = follow_page_mask(vma, start,
505 foll_flags, &page_mask))) {
507 unsigned int fault_flags = 0;
509 /* For mlock, just skip the stack guard page. */
510 if (foll_flags & FOLL_MLOCK) {
511 if (stack_guard_page(vma, start))
514 if (foll_flags & FOLL_WRITE)
515 fault_flags |= FAULT_FLAG_WRITE;
517 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
518 if (foll_flags & FOLL_NOWAIT)
519 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
521 ret = handle_mm_fault(mm, vma, start,
524 if (ret & VM_FAULT_ERROR) {
525 if (ret & VM_FAULT_OOM)
526 return i ? i : -ENOMEM;
527 if (ret & (VM_FAULT_HWPOISON |
528 VM_FAULT_HWPOISON_LARGE)) {
531 else if (gup_flags & FOLL_HWPOISON)
536 if (ret & VM_FAULT_SIGBUS)
537 return i ? i : -EFAULT;
542 if (ret & VM_FAULT_MAJOR)
548 if (ret & VM_FAULT_RETRY) {
555 * The VM_FAULT_WRITE bit tells us that
556 * do_wp_page has broken COW when necessary,
557 * even if maybe_mkwrite decided not to set
558 * pte_write. We can thus safely do subsequent
559 * page lookups as if they were reads. But only
560 * do so when looping for pte_write is futile:
561 * in some cases userspace may also be wanting
562 * to write to the gotten user page, which a
563 * read fault here might prevent (a readonly
564 * page might get reCOWed by userspace write).
566 if ((ret & VM_FAULT_WRITE) &&
567 !(vma->vm_flags & VM_WRITE))
568 foll_flags &= ~FOLL_WRITE;
570 /* cond_resched(); */
573 return i ? i : PTR_ERR(page);
577 flush_anon_page(vma, page, start);
578 flush_dcache_page(page);
586 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
587 if (page_increm > nr_pages)
588 page_increm = nr_pages;
590 start += page_increm * PAGE_SIZE;
591 nr_pages -= page_increm;
592 } while (nr_pages && start < vma->vm_end);
597 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
599 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
600 unsigned long start, int nr_pages, unsigned int gup_flags,
601 struct page **pages, struct vm_area_struct **vmas,
605 unsigned long vm_flags;
611 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
614 * Require read or write permissions.
615 * If FOLL_FORCE is set, we only require the "MAY" flags.
617 vm_flags = (gup_flags & FOLL_WRITE) ?
618 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
619 vm_flags &= (gup_flags & FOLL_FORCE) ?
620 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
624 struct vm_area_struct *vma;
626 vma = find_extend_vma(mm, start);
627 if (!vma && dbi_in_gate_area_no_xxx(start)) {
628 unsigned long pg = start & PAGE_MASK;
634 /* user gate pages are read-only */
635 if (gup_flags & FOLL_WRITE) {
636 return i ? : -EFAULT;
639 pgd = pgd_offset_k(pg);
641 pgd = pgd_offset_gate(mm, pg);
642 BUG_ON(pgd_none(*pgd));
643 pud = pud_offset(pgd, pg);
644 BUG_ON(pud_none(*pud));
645 pmd = pmd_offset(pud, pg);
646 if (pmd_none(*pmd)) {
647 return i ? : -EFAULT;
649 VM_BUG_ON(pmd_trans_huge(*pmd));
650 pte = pte_offset_map(pmd, pg);
651 if (pte_none(*pte)) {
653 return i ? : -EFAULT;
655 vma = get_gate_vma(mm);
659 page = vm_normal_page(vma, start, *pte);
661 if (!(gup_flags & FOLL_DUMP) &&
662 swap_is_zero_pfn(pte_pfn(*pte)))
663 page = pte_page(*pte);
666 return i ? : -EFAULT;
677 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
678 !(vm_flags & vma->vm_flags)) {
679 return i ? : -EFAULT;
682 if (is_vm_hugetlb_page(vma)) {
683 i = follow_hugetlb_page(mm, vma, pages, vmas,
684 &start, &nr_pages, i, gup_flags);
690 unsigned int foll_flags = gup_flags;
693 * If we have a pending SIGKILL, don't keep faulting
694 * pages and potentially allocating memory.
696 if (unlikely(fatal_signal_pending(current))) {
697 return i ? i : -ERESTARTSYS;
700 /* cond_resched(); */
701 while (!(page = follow_page(vma, start, foll_flags))) {
703 unsigned int fault_flags = 0;
705 /* For mlock, just skip the stack guard page. */
706 if (foll_flags & FOLL_MLOCK) {
707 if (stack_guard_page(vma, start))
710 if (foll_flags & FOLL_WRITE)
711 fault_flags |= FAULT_FLAG_WRITE;
713 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
714 if (foll_flags & FOLL_NOWAIT)
715 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
717 ret = handle_mm_fault(mm, vma, start,
720 if (ret & VM_FAULT_ERROR) {
721 if (ret & VM_FAULT_OOM) {
722 return i ? i : -ENOMEM;
724 if (ret & (VM_FAULT_HWPOISON |
725 VM_FAULT_HWPOISON_LARGE)) {
729 else if (gup_flags & FOLL_HWPOISON) {
736 if (ret & VM_FAULT_SIGBUS) {
737 return i ? i : -EFAULT;
743 if (ret & VM_FAULT_MAJOR)
749 if (ret & VM_FAULT_RETRY) {
756 * The VM_FAULT_WRITE bit tells us that
757 * do_wp_page has broken COW when necessary,
758 * even if maybe_mkwrite decided not to set
759 * pte_write. We can thus safely do subsequent
760 * page lookups as if they were reads. But only
761 * do so when looping for pte_write is futile:
762 * in some cases userspace may also be wanting
763 * to write to the gotten user page, which a
764 * read fault here might prevent (a readonly
765 * page might get reCOWed by userspace write).
767 if ((ret & VM_FAULT_WRITE) &&
768 !(vma->vm_flags & VM_WRITE))
769 foll_flags &= ~FOLL_WRITE;
771 /* cond_resched(); */
774 return i ? i : PTR_ERR(page);
779 flush_anon_page(vma, page, start);
780 flush_dcache_page(page);
788 } while (nr_pages && start < vma->vm_end);
794 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
796 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
798 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
799 unsigned long start, int len, int flags,
800 struct page **pages, struct vm_area_struct **vmas)
803 unsigned int vm_flags = 0;
804 int write = !!(flags & GUP_FLAGS_WRITE);
805 int force = !!(flags & GUP_FLAGS_FORCE);
806 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
811 * Require read or write permissions.
812 * If 'force' is set, we only require the "MAY" flags.
814 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
815 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
819 struct vm_area_struct *vma;
820 unsigned int foll_flags;
822 vma = find_vma(mm, start);
823 if (!vma && dbi_in_gate_area(tsk, start)) {
824 unsigned long pg = start & PAGE_MASK;
825 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
831 /* user gate pages are read-only */
832 if (!ignore && write)
833 return i ? : -EFAULT;
835 pgd = pgd_offset_k(pg);
837 pgd = pgd_offset_gate(mm, pg);
838 BUG_ON(pgd_none(*pgd));
839 pud = pud_offset(pgd, pg);
840 BUG_ON(pud_none(*pud));
841 pmd = pmd_offset(pud, pg);
843 return i ? : -EFAULT;
844 pte = pte_offset_map(pmd, pg);
845 if (pte_none(*pte)) {
847 return i ? : -EFAULT;
850 struct page *page = vm_normal_page(gate_vma, start, *pte);
865 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
866 (!ignore && !(vm_flags & vma->vm_flags)))
867 return i ? : -EFAULT;
869 if (is_vm_hugetlb_page(vma)) {
870 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
871 i = follow_hugetlb_page(mm, vma, pages, vmas,
874 i = follow_hugetlb_page(mm, vma, pages, vmas,
875 &start, &len, i, write);
880 foll_flags = FOLL_TOUCH;
882 foll_flags |= FOLL_GET;
884 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
885 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
886 if (!write && use_zero_page(vma))
887 foll_flags |= FOLL_ANON;
895 foll_flags |= FOLL_WRITE;
900 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
901 while (!(page = follow_page(vma, start, foll_flags))) {
903 ret = handle_mm_fault(mm, vma, start,
904 foll_flags & FOLL_WRITE);
906 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
907 if (ret & VM_FAULT_WRITE)
908 foll_flags &= ~FOLL_WRITE;
910 switch (ret & ~VM_FAULT_WRITE) {
917 case VM_FAULT_SIGBUS:
918 return i ? i : -EFAULT;
920 return i ? i : -ENOMEM;
926 if (ret & VM_FAULT_ERROR) {
927 if (ret & VM_FAULT_OOM)
928 return i ? i : -ENOMEM;
929 else if (ret & VM_FAULT_SIGBUS)
930 return i ? i : -EFAULT;
933 if (ret & VM_FAULT_MAJOR)
939 * The VM_FAULT_WRITE bit tells us that
940 * do_wp_page has broken COW when necessary,
941 * even if maybe_mkwrite decided not to set
942 * pte_write. We can thus safely do subsequent
943 * page lookups as if they were reads. But only
944 * do so when looping for pte_write is futile:
945 * in some cases userspace may also be wanting
946 * to write to the gotten user page, which a
947 * read fault here might prevent (a readonly
948 * page might get reCOWed by userspace write).
950 if ((ret & VM_FAULT_WRITE) &&
951 !(vma->vm_flags & VM_WRITE))
952 foll_flags &= ~FOLL_WRITE;
960 return i ? i : PTR_ERR(page);
964 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
965 flush_anon_page(page, start);
967 flush_anon_page(vma, page, start);
969 flush_dcache_page(page);
976 } while (len && start < vma->vm_end);
981 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
983 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
984 unsigned long start, int len, int write, int force,
985 struct page **pages, struct vm_area_struct **vmas)
987 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
988 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
989 int flags = FOLL_TOUCH;
997 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1001 flags |= GUP_FLAGS_WRITE;
1003 flags |= GUP_FLAGS_FORCE;
1004 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1006 return __get_user_pages_uprobe(tsk, mm,
1008 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1010 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1012 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1014 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1018 #define ACCESS_PROCESS_OPTIMIZATION 0
1020 #if ACCESS_PROCESS_OPTIMIZATION
1022 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1023 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1025 static void read_data_current(unsigned long addr, void *buf, int len)
1030 for (step = GET_STEP_4(len); len; len -= step) {
1031 switch (GET_STEP_4(len)) {
1033 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1039 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1044 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1054 static void write_data_current(unsigned long addr, void *buf, int len)
1059 for (step = GET_STEP_4(len); len; len -= step) {
1060 switch (GET_STEP_4(len)) {
1062 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1068 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1073 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1083 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1085 struct mm_struct *mm;
1086 struct vm_area_struct *vma;
1087 void *old_buf = buf;
1093 #if ACCESS_PROCESS_OPTIMIZATION
1094 if (write == 0 && tsk == current) {
1095 read_data_current(addr, buf, len);
1100 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1104 /* ignore errors, just check how much was successfully transferred */
1106 int bytes, ret, offset;
1108 struct page *page = NULL;
1110 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1111 write, 1, &page, &vma);
1115 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1116 * we can access using slightly different code.
1118 #ifdef CONFIG_HAVE_IOREMAP_PROT
1119 vma = find_vma(mm, addr);
1122 if (vma->vm_ops && vma->vm_ops->access)
1123 ret = vma->vm_ops->access(vma, addr, buf,
1131 offset = addr & (PAGE_SIZE-1);
1132 if (bytes > PAGE_SIZE-offset)
1133 bytes = PAGE_SIZE-offset;
1135 maddr = dbi_kmap_atomic(page);
1138 copy_to_user_page(vma, page, addr,
1139 maddr + offset, buf, bytes);
1140 set_page_dirty_lock(page);
1142 copy_from_user_page(vma, page, addr,
1143 buf, maddr + offset, bytes);
1146 dbi_kunmap_atomic(maddr);
1147 page_cache_release(page);
1154 return buf - old_buf;
1157 int page_present (struct mm_struct *mm, unsigned long address)
1165 pgd = pgd_offset(mm, address);
1166 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1169 pud = pud_offset(pgd, address);
1170 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1173 pmd = pmd_offset(pud, address);
1174 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1177 ptep = pte_offset_map(pmd, address);
1183 if (pte_present(pte)) {
1185 if (pfn_valid(pfn)) {
1195 EXPORT_SYMBOL_GPL (page_present);
1196 EXPORT_SYMBOL_GPL (access_process_vm_atomic);