2 * Dynamic Binary Instrumentation Module based on KProbes
3 * modules/kprobe/dbi_kprobes_deps.h
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (C) Samsung Electronics, 2006-2010
21 * 2008-2009 Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22 * Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23 * 2010 Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
27 #include <linux/module.h>
28 #include <linux/sched.h>
30 #include <asm/pgtable.h>
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
36 #include <linux/slab.h>
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
43 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
44 /* kernel define 'pgd_offset_k' redefinition */
46 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
49 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
52 static unsigned long swap_zero_pfn = 0;
54 #endif /* is_zero_pfn */
55 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
57 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
58 static inline void *dbi_kmap_atomic(struct page *page)
60 return kmap_atomic(page);
62 static inline void dbi_kunmap_atomic(void *kvaddr)
64 kunmap_atomic(kvaddr);
66 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
67 static inline void *dbi_kmap_atomic(struct page *page)
69 return kmap_atomic(page, KM_USER0);
72 static inline void dbi_kunmap_atomic(void *kvaddr)
74 kunmap_atomic(kvaddr, KM_USER0);
76 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
79 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
80 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
81 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
82 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
83 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
85 /* copy_to_user_page */
86 #ifndef copy_to_user_page
87 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
88 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
89 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
90 #endif /* copy_to_user_page */
93 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
95 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
97 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
98 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
99 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
102 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
103 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
105 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
106 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
107 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
108 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
109 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
111 #ifdef CONFIG_HUGETLB_PAGE
112 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
113 struct vm_area_struct *vma, struct page **pages, \
114 struct vm_area_struct **vmas, unsigned long *position, int *length, \
118 #ifdef __HAVE_ARCH_GATE_AREA
119 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
120 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
121 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
122 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
123 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
124 #endif /* __HAVE_ARCH_GATE_AREA */
126 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
127 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
128 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
129 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
130 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
133 static DECLARE_MOD_FUNC_DEP(follow_page, \
134 struct page *, struct vm_area_struct * vma, \
135 unsigned long address, unsigned int foll_flags);
136 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
137 void, struct vm_area_struct *vma, struct page *page, \
138 unsigned long vmaddr);
139 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
140 struct page *, struct vm_area_struct *vma, \
141 unsigned long addr, pte_t pte);
142 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
143 void, struct vm_area_struct *vma, struct page *page, \
144 unsigned long uaddr, void *kaddr, unsigned long len, int write);
147 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
148 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
149 void, struct task_struct *tsk);
151 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
152 void, struct rcu_head * rhp);
155 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
156 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
158 DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
159 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
161 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
162 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
163 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
164 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
165 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
168 DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
169 int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
170 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
173 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
174 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
175 struct vm_area_struct *, struct mm_struct *mm)
176 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
177 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
178 DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
179 struct vm_area_struct *, struct task_struct *tsk)
180 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
181 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
183 #ifdef CONFIG_HUGETLB_PAGE
184 DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
185 IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
188 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
190 #ifdef __HAVE_ARCH_GATE_AREA
191 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
192 struct mm_struct *mm = task->mm;
193 IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
194 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
195 IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
196 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
197 #else /*__HAVE_ARCH_GATE_AREA */
198 return in_gate_area(task, addr);
199 #endif/*__HAVE_ARCH_GATE_AREA */
203 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
204 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
205 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
206 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
207 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
208 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
209 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
211 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
213 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
214 return in_gate_area_no_mm(addr);
215 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
216 return in_gate_area_no_task(addr);
217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
221 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
222 DECLARE_MOD_DEP_WRAPPER (follow_page, \
223 struct page *, struct vm_area_struct * vma, \
224 unsigned long address, unsigned int foll_flags)
225 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
227 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
228 void, struct vm_area_struct *vma, \
229 struct page *page, unsigned long vmaddr)
230 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
232 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
233 struct page *, struct vm_area_struct *vma, \
234 unsigned long addr, pte_t pte)
235 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
237 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
238 void, struct vm_area_struct *vma, struct page *page, \
239 unsigned long uaddr, void *kaddr, unsigned long len, int write)
240 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
244 int init_module_dependencies(void)
246 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
247 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
250 #ifndef copy_to_user_page
251 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
252 #endif /* copy_to_user_page */
254 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
255 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
256 INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
258 #ifdef CONFIG_HUGETLB_PAGE
259 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
262 #ifdef __HAVE_ARCH_GATE_AREA
263 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
266 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
269 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
270 #endif /* is_zero_pfn */
272 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
273 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
274 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
275 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
277 INIT_MOD_DEP_VAR(follow_page, follow_page);
279 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
280 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
281 INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
283 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
284 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
285 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
287 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
290 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
293 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
294 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
300 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
301 #define GUP_FLAGS_WRITE 0x1
302 #define GUP_FLAGS_FORCE 0x2
303 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
304 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
305 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
307 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
308 static inline int use_zero_page(struct vm_area_struct *vma)
311 * We don't want to optimize FOLL_ANON for make_pages_present()
312 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
313 * we want to get the page from the page tables to make sure
314 * that we serialize and update with any other user of that
317 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
320 * And if we have a fault routine, it's not an anonymous region.
322 return !vma->vm_ops || !vma->vm_ops->fault;
326 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
328 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
330 #ifdef __HAVE_COLOR_ZERO_PAGE
332 static inline int swap_is_zero_pfn(unsigned long pfn)
334 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
335 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
338 #else /* __HAVE_COLOR_ZERO_PAGE */
340 static inline int swap_is_zero_pfn(unsigned long pfn)
342 return pfn == swap_zero_pfn;
344 #endif /* __HAVE_COLOR_ZERO_PAGE */
346 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
348 static inline int swap_is_zero_pfn(unsigned long pfn)
351 return pfn == swap_zero_pfn;
352 #else /* is_zero_pfn */
353 return is_zero_pfn(pfn);
354 #endif /* is_zero_pfn */
357 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
359 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
361 return stack_guard_page_start(vma, addr) ||
362 stack_guard_page_end(vma, addr+PAGE_SIZE);
365 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
366 unsigned long start, int nr_pages, unsigned int gup_flags,
367 struct page **pages, struct vm_area_struct **vmas,
371 unsigned long vm_flags;
377 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
380 * Require read or write permissions.
381 * If FOLL_FORCE is set, we only require the "MAY" flags.
383 vm_flags = (gup_flags & FOLL_WRITE) ?
384 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
385 vm_flags &= (gup_flags & FOLL_FORCE) ?
386 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
390 struct vm_area_struct *vma;
392 vma = find_extend_vma(mm, start);
393 if (!vma && dbi_in_gate_area_no_xxx(start)) {
394 unsigned long pg = start & PAGE_MASK;
400 /* user gate pages are read-only */
401 if (gup_flags & FOLL_WRITE) {
402 return i ? : -EFAULT;
405 pgd = pgd_offset_k(pg);
407 pgd = pgd_offset_gate(mm, pg);
408 BUG_ON(pgd_none(*pgd));
409 pud = pud_offset(pgd, pg);
410 BUG_ON(pud_none(*pud));
411 pmd = pmd_offset(pud, pg);
412 if (pmd_none(*pmd)) {
413 return i ? : -EFAULT;
415 VM_BUG_ON(pmd_trans_huge(*pmd));
416 pte = pte_offset_map(pmd, pg);
417 if (pte_none(*pte)) {
419 return i ? : -EFAULT;
421 vma = get_gate_vma(mm);
425 page = vm_normal_page(vma, start, *pte);
427 if (!(gup_flags & FOLL_DUMP) &&
428 swap_is_zero_pfn(pte_pfn(*pte)))
429 page = pte_page(*pte);
432 return i ? : -EFAULT;
443 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
444 !(vm_flags & vma->vm_flags)) {
445 return i ? : -EFAULT;
448 if (is_vm_hugetlb_page(vma)) {
449 i = follow_hugetlb_page(mm, vma, pages, vmas,
450 &start, &nr_pages, i, gup_flags);
456 unsigned int foll_flags = gup_flags;
459 * If we have a pending SIGKILL, don't keep faulting
460 * pages and potentially allocating memory.
462 if (unlikely(fatal_signal_pending(current))) {
463 return i ? i : -ERESTARTSYS;
466 /* cond_resched(); */
467 while (!(page = follow_page(vma, start, foll_flags))) {
469 unsigned int fault_flags = 0;
471 /* For mlock, just skip the stack guard page. */
472 if (foll_flags & FOLL_MLOCK) {
473 if (stack_guard_page(vma, start))
476 if (foll_flags & FOLL_WRITE)
477 fault_flags |= FAULT_FLAG_WRITE;
479 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
480 if (foll_flags & FOLL_NOWAIT)
481 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
483 ret = handle_mm_fault(mm, vma, start,
486 if (ret & VM_FAULT_ERROR) {
487 if (ret & VM_FAULT_OOM) {
488 return i ? i : -ENOMEM;
490 if (ret & (VM_FAULT_HWPOISON |
491 VM_FAULT_HWPOISON_LARGE)) {
495 else if (gup_flags & FOLL_HWPOISON) {
502 if (ret & VM_FAULT_SIGBUS) {
503 return i ? i : -EFAULT;
509 if (ret & VM_FAULT_MAJOR)
515 if (ret & VM_FAULT_RETRY) {
522 * The VM_FAULT_WRITE bit tells us that
523 * do_wp_page has broken COW when necessary,
524 * even if maybe_mkwrite decided not to set
525 * pte_write. We can thus safely do subsequent
526 * page lookups as if they were reads. But only
527 * do so when looping for pte_write is futile:
528 * in some cases userspace may also be wanting
529 * to write to the gotten user page, which a
530 * read fault here might prevent (a readonly
531 * page might get reCOWed by userspace write).
533 if ((ret & VM_FAULT_WRITE) &&
534 !(vma->vm_flags & VM_WRITE))
535 foll_flags &= ~FOLL_WRITE;
537 /* cond_resched(); */
540 return i ? i : PTR_ERR(page);
545 flush_anon_page(vma, page, start);
546 flush_dcache_page(page);
554 } while (nr_pages && start < vma->vm_end);
559 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
561 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
562 unsigned long start, int len, int flags,
563 struct page **pages, struct vm_area_struct **vmas)
566 unsigned int vm_flags = 0;
567 int write = !!(flags & GUP_FLAGS_WRITE);
568 int force = !!(flags & GUP_FLAGS_FORCE);
569 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
574 * Require read or write permissions.
575 * If 'force' is set, we only require the "MAY" flags.
577 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
578 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
582 struct vm_area_struct *vma;
583 unsigned int foll_flags;
585 vma = find_vma(mm, start);
586 if (!vma && dbi_in_gate_area(tsk, start)) {
587 unsigned long pg = start & PAGE_MASK;
588 struct vm_area_struct *gate_vma = get_gate_vma(tsk);
594 /* user gate pages are read-only */
595 if (!ignore && write)
596 return i ? : -EFAULT;
598 pgd = pgd_offset_k(pg);
600 pgd = pgd_offset_gate(mm, pg);
601 BUG_ON(pgd_none(*pgd));
602 pud = pud_offset(pgd, pg);
603 BUG_ON(pud_none(*pud));
604 pmd = pmd_offset(pud, pg);
606 return i ? : -EFAULT;
607 pte = pte_offset_map(pmd, pg);
608 if (pte_none(*pte)) {
610 return i ? : -EFAULT;
613 struct page *page = vm_normal_page(gate_vma, start, *pte);
628 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
629 (!ignore && !(vm_flags & vma->vm_flags)))
630 return i ? : -EFAULT;
632 if (is_vm_hugetlb_page(vma)) {
633 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
634 i = follow_hugetlb_page(mm, vma, pages, vmas,
637 i = follow_hugetlb_page(mm, vma, pages, vmas,
638 &start, &len, i, write);
643 foll_flags = FOLL_TOUCH;
645 foll_flags |= FOLL_GET;
647 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
648 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
649 if (!write && use_zero_page(vma))
650 foll_flags |= FOLL_ANON;
658 foll_flags |= FOLL_WRITE;
663 DBPRINTF ("pages = %p vma = %p\n", pages, vma);
664 while (!(page = follow_page(vma, start, foll_flags))) {
666 ret = handle_mm_fault(mm, vma, start,
667 foll_flags & FOLL_WRITE);
669 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
670 if (ret & VM_FAULT_WRITE)
671 foll_flags &= ~FOLL_WRITE;
673 switch (ret & ~VM_FAULT_WRITE) {
680 case VM_FAULT_SIGBUS:
681 return i ? i : -EFAULT;
683 return i ? i : -ENOMEM;
689 if (ret & VM_FAULT_ERROR) {
690 if (ret & VM_FAULT_OOM)
691 return i ? i : -ENOMEM;
692 else if (ret & VM_FAULT_SIGBUS)
693 return i ? i : -EFAULT;
696 if (ret & VM_FAULT_MAJOR)
702 * The VM_FAULT_WRITE bit tells us that
703 * do_wp_page has broken COW when necessary,
704 * even if maybe_mkwrite decided not to set
705 * pte_write. We can thus safely do subsequent
706 * page lookups as if they were reads. But only
707 * do so when looping for pte_write is futile:
708 * in some cases userspace may also be wanting
709 * to write to the gotten user page, which a
710 * read fault here might prevent (a readonly
711 * page might get reCOWed by userspace write).
713 if ((ret & VM_FAULT_WRITE) &&
714 !(vma->vm_flags & VM_WRITE))
715 foll_flags &= ~FOLL_WRITE;
723 return i ? i : PTR_ERR(page);
727 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
728 flush_anon_page(page, start);
730 flush_anon_page(vma, page, start);
732 flush_dcache_page(page);
739 } while (len && start < vma->vm_end);
744 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
746 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
747 unsigned long start, int len, int write, int force,
748 struct page **pages, struct vm_area_struct **vmas)
750 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
751 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
752 int flags = FOLL_TOUCH;
760 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
764 flags |= GUP_FLAGS_WRITE;
766 flags |= GUP_FLAGS_FORCE;
767 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
769 return __get_user_pages_uprobe(tsk, mm,
771 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
773 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
775 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
777 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
781 #define ACCESS_PROCESS_OPTIMIZATION 0
783 #if ACCESS_PROCESS_OPTIMIZATION
785 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
786 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
788 static void read_data_current(unsigned long addr, void *buf, int len)
793 for (step = GET_STEP_4(len); len; len -= step) {
794 switch (GET_STEP_4(len)) {
796 get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
802 get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
807 get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
817 static void write_data_current(unsigned long addr, void *buf, int len)
822 for (step = GET_STEP_4(len); len; len -= step) {
823 switch (GET_STEP_4(len)) {
825 put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
831 put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
836 put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
846 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
848 struct mm_struct *mm;
849 struct vm_area_struct *vma;
856 #if ACCESS_PROCESS_OPTIMIZATION
857 if (write == 0 && tsk == current) {
858 read_data_current(addr, buf, len);
863 mm = tsk->mm; /* function 'get_task_mm' is to be called */
867 /* ignore errors, just check how much was successfully transferred */
869 int bytes, ret, offset;
871 struct page *page = NULL;
873 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
874 write, 1, &page, &vma);
878 * Check if this is a VM_IO | VM_PFNMAP VMA, which
879 * we can access using slightly different code.
881 #ifdef CONFIG_HAVE_IOREMAP_PROT
882 vma = find_vma(mm, addr);
885 if (vma->vm_ops && vma->vm_ops->access)
886 ret = vma->vm_ops->access(vma, addr, buf,
894 offset = addr & (PAGE_SIZE-1);
895 if (bytes > PAGE_SIZE-offset)
896 bytes = PAGE_SIZE-offset;
898 maddr = dbi_kmap_atomic(page);
901 copy_to_user_page(vma, page, addr,
902 maddr + offset, buf, bytes);
903 set_page_dirty_lock(page);
905 copy_from_user_page(vma, page, addr,
906 buf, maddr + offset, bytes);
909 dbi_kunmap_atomic(maddr);
910 page_cache_release(page);
917 return buf - old_buf;
920 int page_present (struct mm_struct *mm, unsigned long address)
928 pgd = pgd_offset(mm, address);
929 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
932 pud = pud_offset(pgd, address);
933 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
936 pmd = pmd_offset(pud, address);
937 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
940 ptep = pte_offset_map(pmd, address);
946 if (pte_present(pte)) {
948 if (pfn_valid(pfn)) {
958 EXPORT_SYMBOL_GPL (page_present);
959 EXPORT_SYMBOL_GPL (access_process_vm_atomic);