Fix for commit 15b31911841bc8ebbd779a658dcde6e8d104e268
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/module.h>
28 #include <linux/sched.h>
29
30 #include <asm/pgtable.h>
31
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
34
35
36 #include <linux/slab.h>
37 #include <linux/mm.h>
38
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41
42 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
43 static struct mm_struct* init_mm_ptr;
44 struct mm_struct init_mm;
45 #endif
46
47
48 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
49 static inline void *dbi_kmap_atomic(struct page *page)
50 {
51         return kmap_atomic(page);
52 }
53 static inline void dbi_kunmap_atomic(void *kvaddr)
54 {
55         kunmap_atomic(kvaddr);
56 }
57 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
58 static inline void *dbi_kmap_atomic(struct page *page)
59 {
60         return kmap_atomic(page, KM_USER0);
61 }
62
63 static inline void dbi_kunmap_atomic(void *kvaddr)
64 {
65         kunmap_atomic(kvaddr, KM_USER0);
66 }
67 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
68
69
70 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
71 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
72 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
73 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
74 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
75
76 /* copy_to_user_page */
77 #ifndef copy_to_user_page
78 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
79 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
80 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
81 #endif /* copy_to_user_page */
82
83
84 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
85
86 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
87
88 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
89 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
90 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
91 #endif
92 #else
93 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
94 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
95
96 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
97 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
98 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
99 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
100 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
101
102 #ifdef CONFIG_HUGETLB_PAGE
103 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
104                 struct vm_area_struct *vma, struct page **pages, \
105                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
106                 int i, int write);
107 #endif
108
109 #ifdef __HAVE_ARCH_GATE_AREA
110 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
111 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
112 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
113 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
114 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
115 #endif /* __HAVE_ARCH_GATE_AREA */
116
117 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
118 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
119 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
120 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
121 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
122
123
124 static DECLARE_MOD_FUNC_DEP(follow_page, \
125                 struct page *, struct vm_area_struct * vma, \
126                 unsigned long address, unsigned int foll_flags);
127 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
128                 void, struct vm_area_struct *vma, struct page *page, \
129                 unsigned long vmaddr);
130 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
131                 struct page *, struct vm_area_struct *vma, \
132                 unsigned long addr, pte_t pte);
133 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
134                 void, struct vm_area_struct *vma, struct page *page, \
135                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
136
137
138 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
139 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
140                 void, struct task_struct *tsk);
141 #else
142 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
143                 void, struct rcu_head * rhp);
144 #endif
145
146         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
147 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
148
149         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
150 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
151
152 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
153 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
154         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
155                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
156 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
157 #endif
158 #else
159         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
160                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
161 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
162 #endif
163
164 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
165         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
166                         struct vm_area_struct *, struct mm_struct *mm)
167 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
168 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
169         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
170                         struct vm_area_struct *, struct task_struct *tsk)
171 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
172 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
173
174 #ifdef CONFIG_HUGETLB_PAGE
175         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
176         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
177 #endif
178
179 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
180 {
181 #ifdef __HAVE_ARCH_GATE_AREA
182 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
183         struct mm_struct *mm = task->mm;
184         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
185 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
186         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
187 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
188 #else /*__HAVE_ARCH_GATE_AREA */
189         return in_gate_area(task, addr);
190 #endif/*__HAVE_ARCH_GATE_AREA */
191 }
192
193
194 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
195 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
196 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
197 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
198 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
199 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
200 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
201
202 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
203 {
204 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
205         return in_gate_area_no_mm(addr);
206 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
207         return in_gate_area_no_task(addr);
208 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
209 }
210
211
212 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
213 DECLARE_MOD_DEP_WRAPPER (follow_page, \
214                         struct page *, struct vm_area_struct * vma, \
215                         unsigned long address, unsigned int foll_flags)
216 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
217 #endif
218 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
219                         void, struct vm_area_struct *vma, \
220                         struct page *page, unsigned long vmaddr)
221 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
222
223 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
224                         struct page *, struct vm_area_struct *vma, \
225                         unsigned long addr, pte_t pte)
226 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
227
228 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
229         void, struct vm_area_struct *vma, struct page *page, \
230         unsigned long uaddr, void *kaddr, unsigned long len, int write)
231 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
232
233
234 int init_module_dependencies(void)
235 {
236
237 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
238         init_mm_ptr = (struct mm_struct*)swap_ksyms("init_mm");
239 //      memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
240 #endif
241
242 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
243         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
244 #endif
245
246 #ifndef copy_to_user_page
247         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
248 #endif /* copy_to_user_page */
249
250         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
251         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
252         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
253
254 #ifdef CONFIG_HUGETLB_PAGE
255         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
256 #endif
257
258 #ifdef  __HAVE_ARCH_GATE_AREA
259         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
260 #endif
261
262 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
263         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
264 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
265         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
266 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
267
268         INIT_MOD_DEP_VAR(follow_page, follow_page);
269
270         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
271         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
272         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
273
274 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
275 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
276         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
277 # else
278         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
279 # endif
280 #else /*2.6.16 */
281         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
282 #endif
283
284 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
285         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
286 #endif
287
288         return 0;
289 }
290
291 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
292 #define GUP_FLAGS_WRITE                  0x1
293 #define GUP_FLAGS_FORCE                  0x2
294 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
295 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
296 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
297
298 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
299 static inline int use_zero_page(struct vm_area_struct *vma)
300 {
301         /*
302          * We don't want to optimize FOLL_ANON for make_pages_present()
303          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
304          * we want to get the page from the page tables to make sure
305          * that we serialize and update with any other user of that
306          * mapping.
307          */
308         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
309                 return 0;
310         /*
311          * And if we have a fault routine, it's not an anonymous region.
312          */
313         return !vma->vm_ops || !vma->vm_ops->fault;
314 }
315
316
317 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
318 static unsigned long zero_pfn __read_mostly;
319
320 #ifndef is_zero_pfn
321 static inline int is_zero_pfn(unsigned long pfn)
322 {
323         return pfn == zero_pfn;
324 }
325 #endif
326
327 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
328 {
329         return stack_guard_page_start(vma, addr) ||
330                stack_guard_page_end(vma, addr+PAGE_SIZE);
331 }
332
333 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
334                      unsigned long start, int nr_pages, unsigned int gup_flags,
335                      struct page **pages, struct vm_area_struct **vmas,
336                      int *nonblocking)
337 {
338         int i;
339         unsigned long vm_flags;
340
341         if (nr_pages <= 0) {
342                 return 0;
343         }
344
345         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
346
347         /*
348          * Require read or write permissions.
349          * If FOLL_FORCE is set, we only require the "MAY" flags.
350          */
351         vm_flags  = (gup_flags & FOLL_WRITE) ?
352                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
353         vm_flags &= (gup_flags & FOLL_FORCE) ?
354                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
355         i = 0;
356
357         do {
358                 struct vm_area_struct *vma;
359
360                 vma = find_extend_vma(mm, start);
361                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
362                         unsigned long pg = start & PAGE_MASK;
363                         pgd_t *pgd;
364                         pud_t *pud;
365                         pmd_t *pmd;
366                         pte_t *pte;
367
368                         /* user gate pages are read-only */
369                         if (gup_flags & FOLL_WRITE) {
370                                 return i ? : -EFAULT;
371                         }
372                         if (pg > TASK_SIZE)
373                                 pgd = pgd_offset_k(pg);
374                         else
375                                 pgd = pgd_offset_gate(mm, pg);
376                         BUG_ON(pgd_none(*pgd));
377                         pud = pud_offset(pgd, pg);
378                         BUG_ON(pud_none(*pud));
379                         pmd = pmd_offset(pud, pg);
380                         if (pmd_none(*pmd)) {
381                                 return i ? : -EFAULT;
382                         }
383                         VM_BUG_ON(pmd_trans_huge(*pmd));
384                         pte = pte_offset_map(pmd, pg);
385                         if (pte_none(*pte)) {
386                                 pte_unmap(pte);
387                                 return i ? : -EFAULT;
388                         }
389                         vma = get_gate_vma(mm);
390                         if (pages) {
391                                 struct page *page;
392
393                                 page = vm_normal_page(vma, start, *pte);
394                                 if (!page) {
395                                         if (!(gup_flags & FOLL_DUMP) &&
396                                              is_zero_pfn(pte_pfn(*pte)))
397                                                 page = pte_page(*pte);
398                                         else {
399                                                 pte_unmap(pte);
400                                                 return i ? : -EFAULT;
401                                         }
402                                 }
403                                 pages[i] = page;
404                                 get_page(page);
405                         }
406                         pte_unmap(pte);
407                         goto next_page;
408                 }
409
410                 if (!vma ||
411                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
412                     !(vm_flags & vma->vm_flags)) {
413                         return i ? : -EFAULT;
414                 }
415
416                 if (is_vm_hugetlb_page(vma)) {
417                         i = follow_hugetlb_page(mm, vma, pages, vmas,
418                                         &start, &nr_pages, i, gup_flags);
419                         continue;
420                 }
421
422                 do {
423                         struct page *page;
424                         unsigned int foll_flags = gup_flags;
425
426                         /*
427                          * If we have a pending SIGKILL, don't keep faulting
428                          * pages and potentially allocating memory.
429                          */
430                         if (unlikely(fatal_signal_pending(current))) {
431                                 return i ? i : -ERESTARTSYS;
432                         }
433
434                         /* cond_resched(); */
435                         while (!(page = follow_page(vma, start, foll_flags))) {
436                                 int ret;
437                                 unsigned int fault_flags = 0;
438
439                                 /* For mlock, just skip the stack guard page. */
440                                 if (foll_flags & FOLL_MLOCK) {
441                                         if (stack_guard_page(vma, start))
442                                                 goto next_page;
443                                 }
444                                 if (foll_flags & FOLL_WRITE)
445                                         fault_flags |= FAULT_FLAG_WRITE;
446                                 if (nonblocking)
447                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
448                                 if (foll_flags & FOLL_NOWAIT)
449                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
450
451                                 ret = handle_mm_fault(mm, vma, start,
452                                                         fault_flags);
453
454                                 if (ret & VM_FAULT_ERROR) {
455                                         if (ret & VM_FAULT_OOM) {
456                                                 return i ? i : -ENOMEM;
457                                         }
458                                         if (ret & (VM_FAULT_HWPOISON |
459                                                    VM_FAULT_HWPOISON_LARGE)) {
460                                                 if (i) {
461                                                         return i;
462                                                 }
463                                                 else if (gup_flags & FOLL_HWPOISON) {
464                                                         return -EHWPOISON;
465                                                 }
466                                                 else {
467                                                         return -EFAULT;
468                                                 }
469                                         }
470                                         if (ret & VM_FAULT_SIGBUS) {
471                                                 return i ? i : -EFAULT;
472                                         }
473                                         BUG();
474                                 }
475
476                                 if (tsk) {
477                                         if (ret & VM_FAULT_MAJOR)
478                                                 tsk->maj_flt++;
479                                         else
480                                                 tsk->min_flt++;
481                                 }
482
483                                 if (ret & VM_FAULT_RETRY) {
484                                         if (nonblocking)
485                                                 *nonblocking = 0;
486                                         return i;
487                                 }
488
489                                 /*
490                                  * The VM_FAULT_WRITE bit tells us that
491                                  * do_wp_page has broken COW when necessary,
492                                  * even if maybe_mkwrite decided not to set
493                                  * pte_write. We can thus safely do subsequent
494                                  * page lookups as if they were reads. But only
495                                  * do so when looping for pte_write is futile:
496                                  * in some cases userspace may also be wanting
497                                  * to write to the gotten user page, which a
498                                  * read fault here might prevent (a readonly
499                                  * page might get reCOWed by userspace write).
500                                  */
501                                 if ((ret & VM_FAULT_WRITE) &&
502                                     !(vma->vm_flags & VM_WRITE))
503                                         foll_flags &= ~FOLL_WRITE;
504
505                                 /* cond_resched(); */
506                         }
507                         if (IS_ERR(page)) {
508                                 return i ? i : PTR_ERR(page);
509                         }
510                         if (pages) {
511                                 pages[i] = page;
512
513                                 flush_anon_page(vma, page, start);
514                                 flush_dcache_page(page);
515                         }
516 next_page:
517                         if (vmas)
518                                 vmas[i] = vma;
519                         i++;
520                         start += PAGE_SIZE;
521                         nr_pages--;
522                 } while (nr_pages && start < vma->vm_end);
523         } while (nr_pages);
524
525         return i;
526 }
527 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
528
529 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
530                      unsigned long start, int len, int flags,
531                 struct page **pages, struct vm_area_struct **vmas)
532 {
533         int i;
534         unsigned int vm_flags = 0;
535         int write = !!(flags & GUP_FLAGS_WRITE);
536         int force = !!(flags & GUP_FLAGS_FORCE);
537         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
538         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
539
540         if (len <= 0)
541                 return 0;
542         /*
543          * Require read or write permissions.
544          * If 'force' is set, we only require the "MAY" flags.
545          */
546         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
547         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
548         i = 0;
549
550         do {
551                 struct vm_area_struct *vma;
552                 unsigned int foll_flags;
553
554                 vma = find_vma(mm, start);
555                 if (!vma && dbi_in_gate_area(tsk, start)) {
556                         unsigned long pg = start & PAGE_MASK;
557                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
558                         pgd_t *pgd;
559                         pud_t *pud;
560                         pmd_t *pmd;
561                         pte_t *pte;
562
563                         /* user gate pages are read-only */
564                         if (!ignore && write)
565                                 return i ? : -EFAULT;
566                         if (pg > TASK_SIZE)
567                                 pgd = pgd_offset_k(pg);
568                         else
569                                 pgd = pgd_offset_gate(mm, pg);
570                         BUG_ON(pgd_none(*pgd));
571                         pud = pud_offset(pgd, pg);
572                         BUG_ON(pud_none(*pud));
573                         pmd = pmd_offset(pud, pg);
574                         if (pmd_none(*pmd))
575                                 return i ? : -EFAULT;
576                         pte = pte_offset_map(pmd, pg);
577                         if (pte_none(*pte)) {
578                                 pte_unmap(pte);
579                                 return i ? : -EFAULT;
580                         }
581                         if (pages) {
582                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
583                                 pages[i] = page;
584                                 if (page)
585                                         get_page(page);
586                         }
587                         pte_unmap(pte);
588                         if (vmas)
589                                 vmas[i] = gate_vma;
590                         i++;
591                         start += PAGE_SIZE;
592                         len--;
593                         continue;
594                 }
595
596                 if (!vma ||
597                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
598                     (!ignore && !(vm_flags & vma->vm_flags)))
599                         return i ? : -EFAULT;
600
601                 if (is_vm_hugetlb_page(vma)) {
602 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
603                         i = follow_hugetlb_page(mm, vma, pages, vmas,
604                                                 &start, &len, i);
605 #else
606                         i = follow_hugetlb_page(mm, vma, pages, vmas,
607                                                 &start, &len, i, write);
608 #endif
609                         continue;
610                 }
611
612                 foll_flags = FOLL_TOUCH;
613                 if (pages)
614                         foll_flags |= FOLL_GET;
615
616 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
617 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
618                 if (!write && use_zero_page(vma))
619                   foll_flags |= FOLL_ANON;
620 #endif
621 #endif
622
623                 do {
624                         struct page *page;
625
626 #if 0
627                         /*
628                          * If we have a pending SIGKILL, don't keep faulting
629                          * pages and potentially allocating memory, unless
630                          * current is handling munlock--e.g., on exit. In
631                          * that case, we are not allocating memory.  Rather,
632                          * we're only unlocking already resident/mapped pages.
633                          */
634                         if (unlikely(!ignore_sigkill &&
635                                         fatal_signal_pending(current)))
636                                 return i ? i : -ERESTARTSYS;
637 #endif
638
639                         if (write)
640                                 foll_flags |= FOLL_WRITE;
641
642
643                         //cond_resched();
644
645                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
646                         while (!(page = follow_page(vma, start, foll_flags))) {
647                                 int ret;
648                                 ret = handle_mm_fault(mm, vma, start,
649                                                 foll_flags & FOLL_WRITE);
650
651 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
652                                 if (ret & VM_FAULT_WRITE)
653                                   foll_flags &= ~FOLL_WRITE;
654
655                                 switch (ret & ~VM_FAULT_WRITE) {
656                                 case VM_FAULT_MINOR:
657                                   tsk->min_flt++;
658                                   break;
659                                 case VM_FAULT_MAJOR:
660                                   tsk->maj_flt++;
661                                   break;
662                                 case VM_FAULT_SIGBUS:
663                                   return i ? i : -EFAULT;
664                                 case VM_FAULT_OOM:
665                                   return i ? i : -ENOMEM;
666                                 default:
667                                   BUG();
668                                 }
669
670 #else
671                                 if (ret & VM_FAULT_ERROR) {
672                                   if (ret & VM_FAULT_OOM)
673                                     return i ? i : -ENOMEM;
674                                   else if (ret & VM_FAULT_SIGBUS)
675                                     return i ? i : -EFAULT;
676                                   BUG();
677                                 }
678                                 if (ret & VM_FAULT_MAJOR)
679                                   tsk->maj_flt++;
680                                 else
681                                   tsk->min_flt++;
682
683                                 /*
684                                  * The VM_FAULT_WRITE bit tells us that
685                                  * do_wp_page has broken COW when necessary,
686                                  * even if maybe_mkwrite decided not to set
687                                  * pte_write. We can thus safely do subsequent
688                                  * page lookups as if they were reads. But only
689                                  * do so when looping for pte_write is futile:
690                                  * in some cases userspace may also be wanting
691                                  * to write to the gotten user page, which a
692                                  * read fault here might prevent (a readonly
693                                  * page might get reCOWed by userspace write).
694                                  */
695                                 if ((ret & VM_FAULT_WRITE) &&
696                                     !(vma->vm_flags & VM_WRITE))
697                                   foll_flags &= ~FOLL_WRITE;
698
699                                 //cond_resched();
700 #endif
701
702                         }
703
704                         if (IS_ERR(page))
705                                 return i ? i : PTR_ERR(page);
706                         if (pages) {
707                                 pages[i] = page;
708
709 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
710                                 flush_anon_page(page, start);
711 #else
712                                 flush_anon_page(vma, page, start);
713 #endif
714                                 flush_dcache_page(page);
715                         }
716                         if (vmas)
717                                 vmas[i] = vma;
718                         i++;
719                         start += PAGE_SIZE;
720                         len--;
721                 } while (len && start < vma->vm_end);
722         } while (len);
723         return i;
724 }
725 #endif
726 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
727
728 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
729                 unsigned long start, int len, int write, int force,
730                 struct page **pages, struct vm_area_struct **vmas)
731 {
732 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
733 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
734         int flags = FOLL_TOUCH;
735
736         if (pages)
737                 flags |= FOLL_GET;
738         if (write)
739                 flags |= FOLL_WRITE;
740         if (force)
741                 flags |= FOLL_FORCE;
742 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
743         int flags = 0;
744
745         if (write)
746                 flags |= GUP_FLAGS_WRITE;
747         if (force)
748                 flags |= GUP_FLAGS_FORCE;
749 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
750
751         return __get_user_pages_uprobe(tsk, mm,
752                                 start, len, flags,
753 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
754                                        pages, vmas, NULL);
755 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
756                                        pages, vmas);
757 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
758 #else
759         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
760 #endif
761 }
762
763 #define ACCESS_PROCESS_OPTIMIZATION 0
764
765 #if ACCESS_PROCESS_OPTIMIZATION
766
767 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
768 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
769
770 static void read_data_current(unsigned long addr, void *buf, int len)
771 {
772         int step;
773         int pos = 0;
774
775         for (step = GET_STEP_4(len); len; len -= step) {
776                 switch (GET_STEP_4(len)) {
777                 case 1:
778                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
779                         step = 1;
780                         break;
781
782                 case 2:
783                 case 3:
784                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
785                         step = 2;
786                         break;
787
788                 case 4:
789                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
790                         step = 4;
791                         break;
792                 }
793
794                 pos += step;
795         }
796 }
797
798 // not working
799 static void write_data_current(unsigned long addr, void *buf, int len)
800 {
801         int step;
802         int pos = 0;
803
804         for (step = GET_STEP_4(len); len; len -= step) {
805                 switch (GET_STEP_4(len)) {
806                 case 1:
807                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
808                         step = 1;
809                         break;
810
811                 case 2:
812                 case 3:
813                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
814                         step = 2;
815                         break;
816
817                 case 4:
818                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
819                         step = 4;
820                         break;
821                 }
822
823                 pos += step;
824         }
825 }
826 #endif
827
828 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
829 {
830         struct mm_struct *mm;
831         struct vm_area_struct *vma;
832         void *old_buf = buf;
833
834         if (len <= 0) {
835                 return -1;
836         }
837
838 #if ACCESS_PROCESS_OPTIMIZATION
839         if (write == 0 && tsk == current) {
840                 read_data_current(addr, buf, len);
841                 return len;
842         }
843 #endif
844
845         mm = tsk->mm; /* function 'get_task_mm' is to be called */
846         if (!mm)
847                 return 0;
848
849         /* ignore errors, just check how much was successfully transferred */
850         while (len) {
851                 int bytes, ret, offset;
852                 void *maddr;
853                 struct page *page = NULL;
854
855                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
856                                             write, 1, &page, &vma);
857
858                 if (ret <= 0) {
859                         /*
860                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
861                          * we can access using slightly different code.
862                          */
863 #ifdef CONFIG_HAVE_IOREMAP_PROT
864                         vma = find_vma(mm, addr);
865                         if (!vma)
866                                 break;
867                         if (vma->vm_ops && vma->vm_ops->access)
868                                 ret = vma->vm_ops->access(vma, addr, buf,
869                                                           len, write);
870                         if (ret <= 0)
871 #endif
872                                 break;
873                         bytes = ret;
874                 } else {
875                         bytes = len;
876                         offset = addr & (PAGE_SIZE-1);
877                         if (bytes > PAGE_SIZE-offset)
878                                 bytes = PAGE_SIZE-offset;
879
880                         maddr = dbi_kmap_atomic(page);
881
882                         if (write) {
883                                 copy_to_user_page(vma, page, addr,
884                                                   maddr + offset, buf, bytes);
885                                 set_page_dirty_lock(page);
886                         } else {
887                                 copy_from_user_page(vma, page, addr,
888                                                     buf, maddr + offset, bytes);
889                         }
890
891                         dbi_kunmap_atomic(maddr);
892                         page_cache_release(page);
893                 }
894                 len -= bytes;
895                 buf += bytes;
896                 addr += bytes;
897         }
898
899         return buf - old_buf;
900 }
901
902 int page_present (struct mm_struct *mm, unsigned long address)
903 {
904         pgd_t *pgd;
905         pud_t *pud;
906         pmd_t *pmd;
907         pte_t *ptep, pte;
908         unsigned long pfn;
909
910         pgd = pgd_offset(mm, address);
911         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
912                 goto out;
913
914         pud = pud_offset(pgd, address);
915         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
916                 goto out;
917
918         pmd = pmd_offset(pud, address);
919         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
920                 goto out;
921
922         ptep = pte_offset_map(pmd, address);
923         if (!ptep)
924                 goto out;
925
926         pte = *ptep;
927         pte_unmap(ptep);
928         if (pte_present(pte)) {
929                 pfn = pte_pfn(pte);
930                 if (pfn_valid(pfn)) {
931                         return 1;
932                 }
933         }
934
935 out:
936         return 0;
937 }
938
939
940 EXPORT_SYMBOL_GPL (page_present);
941 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
942 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
943