Merge branch 'dev' into new_dpf
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/module.h>
28 #include <linux/sched.h>
29
30 #include <asm/pgtable.h>
31
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
34
35
36 #include <linux/slab.h>
37 #include <linux/mm.h>
38
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41
42 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
43 struct mm_struct* init_mm_ptr;
44 struct mm_struct init_mm;
45 #endif
46
47
48 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
49 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
50 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
51 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
52 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
53
54 /* copy_to_user_page */
55 #ifndef copy_to_user_page
56 DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
57 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
58 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
59 #endif /* copy_to_user_page */
60
61
62 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
63
64 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
65
66 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
67 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
68 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
69 #endif
70 #else
71 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
72 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
73
74 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
75 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
76 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
77 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
78 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
79
80 #ifdef CONFIG_HUGETLB_PAGE
81 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
82 #endif
83
84 #ifdef __HAVE_ARCH_GATE_AREA
85 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
86 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
87 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
88 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
89 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
90 #endif /* __HAVE_ARCH_GATE_AREA */
91
92 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
93 DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
94 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
95 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
96 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
97
98
99 DECLARE_MOD_FUNC_DEP(follow_page, \
100                 struct page *, struct vm_area_struct * vma, \
101                 unsigned long address, unsigned int foll_flags);
102 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
103                 void, struct vm_area_struct *vma, struct page *page, \
104                 unsigned long vmaddr);
105 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
106                 struct page *, struct vm_area_struct *vma, \
107                 unsigned long addr, pte_t pte);
108
109
110 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
111 DECLARE_MOD_FUNC_DEP(put_task_struct, \
112                 void, struct task_struct *tsk);
113 #else
114 DECLARE_MOD_FUNC_DEP(put_task_struct, \
115                 void, struct rcu_head * rhp);
116 #endif
117
118         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
119 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
120
121         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
122 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
123
124 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
125 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
126         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
127                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
128 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
129 #endif
130 #else
131         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
132                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
133 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
134 #endif
135
136 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
137         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
138                         struct vm_area_struct *, struct mm_struct *mm)
139 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
140 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
141         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
142                         struct vm_area_struct *, struct task_struct *tsk)
143 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
144 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
145
146 #ifdef CONFIG_HUGETLB_PAGE
147         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
148         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
149 #endif
150
151 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
152 {
153 #ifdef __HAVE_ARCH_GATE_AREA
154 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
155         struct mm_struct *mm = task->mm;
156         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
157 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
158         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
159 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
160 #else /*__HAVE_ARCH_GATE_AREA */
161         return in_gate_area(task, addr);
162 #endif/*__HAVE_ARCH_GATE_AREA */
163 }
164
165
166 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
167 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
168 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
169 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
170 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
171 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
172 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
173
174 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
175 {
176 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
177         return in_gate_area_no_mm(addr);
178 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
179         return in_gate_area_no_task(addr);
180 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
181 }
182
183
184 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
185         DECLARE_MOD_DEP_WRAPPER (follow_page, \
186                         struct page *, struct vm_area_struct * vma, \
187                         unsigned long address, unsigned int foll_flags)
188 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
189 #endif
190         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
191                         void, struct vm_area_struct *vma, \
192                         struct page *page, unsigned long vmaddr)
193 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
194
195         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
196                         struct page *, struct vm_area_struct *vma, \
197                         unsigned long addr, pte_t pte)
198 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
199
200
201 int init_module_dependencies()
202 {
203
204 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
205         init_mm_ptr = (struct mm_struct*)swap_ksyms("init_mm");
206         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
207 #endif
208
209 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
210         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
211 #endif
212
213 #ifndef copy_to_user_page
214         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
215 #endif /* copy_to_user_page */
216
217         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
218         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
219
220 #ifdef CONFIG_HUGETLB_PAGE
221         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
222 #endif
223
224 #ifdef  __HAVE_ARCH_GATE_AREA
225         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
226 #endif
227
228 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
229         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
230 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
231         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
232 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
233
234         INIT_MOD_DEP_VAR(follow_page, follow_page);
235
236         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
237         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
238         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
239
240 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
241 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
242         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
243 # else
244         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
245 # endif
246 #else /*2.6.16 */
247         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
248 #endif
249
250 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
251         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
252 #endif
253
254         return 0;
255 }
256
257 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
258 #define GUP_FLAGS_WRITE                  0x1
259 #define GUP_FLAGS_FORCE                  0x2
260 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
261 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
262 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
263
264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
265 static inline int use_zero_page(struct vm_area_struct *vma)
266 {
267         /*
268          * We don't want to optimize FOLL_ANON for make_pages_present()
269          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
270          * we want to get the page from the page tables to make sure
271          * that we serialize and update with any other user of that
272          * mapping.
273          */
274         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
275                 return 0;
276         /*
277          * And if we have a fault routine, it's not an anonymous region.
278          */
279         return !vma->vm_ops || !vma->vm_ops->fault;
280 }
281
282
283 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
284 unsigned long zero_pfn __read_mostly;
285
286 #ifndef is_zero_pfn
287 static inline int is_zero_pfn(unsigned long pfn)
288 {
289         return pfn == zero_pfn;
290 }
291 #endif
292
293 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
294 {
295         return stack_guard_page_start(vma, addr) ||
296                stack_guard_page_end(vma, addr+PAGE_SIZE);
297 }
298
299 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
300                      unsigned long start, int nr_pages, unsigned int gup_flags,
301                      struct page **pages, struct vm_area_struct **vmas,
302                      int *nonblocking)
303 {
304         int i;
305         unsigned long vm_flags;
306
307         if (nr_pages <= 0) {
308                 return 0;
309         }
310
311         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
312
313         /*
314          * Require read or write permissions.
315          * If FOLL_FORCE is set, we only require the "MAY" flags.
316          */
317         vm_flags  = (gup_flags & FOLL_WRITE) ?
318                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
319         vm_flags &= (gup_flags & FOLL_FORCE) ?
320                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
321         i = 0;
322
323         do {
324                 struct vm_area_struct *vma;
325
326                 vma = find_extend_vma(mm, start);
327                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
328                         unsigned long pg = start & PAGE_MASK;
329                         pgd_t *pgd;
330                         pud_t *pud;
331                         pmd_t *pmd;
332                         pte_t *pte;
333
334                         /* user gate pages are read-only */
335                         if (gup_flags & FOLL_WRITE) {
336                                 return i ? : -EFAULT;
337                         }
338                         if (pg > TASK_SIZE)
339                                 pgd = pgd_offset_k(pg);
340                         else
341                                 pgd = pgd_offset_gate(mm, pg);
342                         BUG_ON(pgd_none(*pgd));
343                         pud = pud_offset(pgd, pg);
344                         BUG_ON(pud_none(*pud));
345                         pmd = pmd_offset(pud, pg);
346                         if (pmd_none(*pmd)) {
347                                 return i ? : -EFAULT;
348                         }
349                         VM_BUG_ON(pmd_trans_huge(*pmd));
350                         pte = pte_offset_map(pmd, pg);
351                         if (pte_none(*pte)) {
352                                 pte_unmap(pte);
353                                 return i ? : -EFAULT;
354                         }
355                         vma = get_gate_vma(mm);
356                         if (pages) {
357                                 struct page *page;
358
359                                 page = vm_normal_page(vma, start, *pte);
360                                 if (!page) {
361                                         if (!(gup_flags & FOLL_DUMP) &&
362                                              is_zero_pfn(pte_pfn(*pte)))
363                                                 page = pte_page(*pte);
364                                         else {
365                                                 pte_unmap(pte);
366                                                 return i ? : -EFAULT;
367                                         }
368                                 }
369                                 pages[i] = page;
370                                 get_page(page);
371                         }
372                         pte_unmap(pte);
373                         goto next_page;
374                 }
375
376                 if (!vma ||
377                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
378                     !(vm_flags & vma->vm_flags)) {
379                         return i ? : -EFAULT;
380                 }
381
382                 if (is_vm_hugetlb_page(vma)) {
383                         i = follow_hugetlb_page(mm, vma, pages, vmas,
384                                         &start, &nr_pages, i, gup_flags);
385                         continue;
386                 }
387
388                 do {
389                         struct page *page;
390                         unsigned int foll_flags = gup_flags;
391
392                         /*
393                          * If we have a pending SIGKILL, don't keep faulting
394                          * pages and potentially allocating memory.
395                          */
396                         if (unlikely(fatal_signal_pending(current))) {
397                                 return i ? i : -ERESTARTSYS;
398                         }
399
400                         /* cond_resched(); */
401                         while (!(page = follow_page(vma, start, foll_flags))) {
402                                 int ret;
403                                 unsigned int fault_flags = 0;
404
405                                 /* For mlock, just skip the stack guard page. */
406                                 if (foll_flags & FOLL_MLOCK) {
407                                         if (stack_guard_page(vma, start))
408                                                 goto next_page;
409                                 }
410                                 if (foll_flags & FOLL_WRITE)
411                                         fault_flags |= FAULT_FLAG_WRITE;
412                                 if (nonblocking)
413                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
414                                 if (foll_flags & FOLL_NOWAIT)
415                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
416
417                                 ret = handle_mm_fault(mm, vma, start,
418                                                         fault_flags);
419
420                                 if (ret & VM_FAULT_ERROR) {
421                                         if (ret & VM_FAULT_OOM) {
422                                                 return i ? i : -ENOMEM;
423                                         }
424                                         if (ret & (VM_FAULT_HWPOISON |
425                                                    VM_FAULT_HWPOISON_LARGE)) {
426                                                 if (i) {
427                                                         return i;
428                                                 }
429                                                 else if (gup_flags & FOLL_HWPOISON) {
430                                                         return -EHWPOISON;
431                                                 }
432                                                 else {
433                                                         return -EFAULT;
434                                                 }
435                                         }
436                                         if (ret & VM_FAULT_SIGBUS) {
437                                                 return i ? i : -EFAULT;
438                                         }
439                                         BUG();
440                                 }
441
442                                 if (tsk) {
443                                         if (ret & VM_FAULT_MAJOR)
444                                                 tsk->maj_flt++;
445                                         else
446                                                 tsk->min_flt++;
447                                 }
448
449                                 if (ret & VM_FAULT_RETRY) {
450                                         if (nonblocking)
451                                                 *nonblocking = 0;
452                                         return i;
453                                 }
454
455                                 /*
456                                  * The VM_FAULT_WRITE bit tells us that
457                                  * do_wp_page has broken COW when necessary,
458                                  * even if maybe_mkwrite decided not to set
459                                  * pte_write. We can thus safely do subsequent
460                                  * page lookups as if they were reads. But only
461                                  * do so when looping for pte_write is futile:
462                                  * in some cases userspace may also be wanting
463                                  * to write to the gotten user page, which a
464                                  * read fault here might prevent (a readonly
465                                  * page might get reCOWed by userspace write).
466                                  */
467                                 if ((ret & VM_FAULT_WRITE) &&
468                                     !(vma->vm_flags & VM_WRITE))
469                                         foll_flags &= ~FOLL_WRITE;
470
471                                 /* cond_resched(); */
472                         }
473                         if (IS_ERR(page)) {
474                                 return i ? i : PTR_ERR(page);
475                         }
476                         if (pages) {
477                                 pages[i] = page;
478
479                                 flush_anon_page(vma, page, start);
480                                 flush_dcache_page(page);
481                         }
482 next_page:
483                         if (vmas)
484                                 vmas[i] = vma;
485                         i++;
486                         start += PAGE_SIZE;
487                         nr_pages--;
488                 } while (nr_pages && start < vma->vm_end);
489         } while (nr_pages);
490
491         return i;
492 }
493 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
494
495 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
496                      unsigned long start, int len, int flags,
497                 struct page **pages, struct vm_area_struct **vmas)
498 {
499         int i;
500         unsigned int vm_flags = 0;
501         int write = !!(flags & GUP_FLAGS_WRITE);
502         int force = !!(flags & GUP_FLAGS_FORCE);
503         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
504         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
505
506         if (len <= 0)
507                 return 0;
508         /*
509          * Require read or write permissions.
510          * If 'force' is set, we only require the "MAY" flags.
511          */
512         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
513         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
514         i = 0;
515
516         do {
517                 struct vm_area_struct *vma;
518                 unsigned int foll_flags;
519
520                 vma = find_vma(mm, start);
521                 if (!vma && dbi_in_gate_area(tsk, start)) {
522                         unsigned long pg = start & PAGE_MASK;
523                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
524                         pgd_t *pgd;
525                         pud_t *pud;
526                         pmd_t *pmd;
527                         pte_t *pte;
528
529                         /* user gate pages are read-only */
530                         if (!ignore && write)
531                                 return i ? : -EFAULT;
532                         if (pg > TASK_SIZE)
533                                 pgd = pgd_offset_k(pg);
534                         else
535                                 pgd = pgd_offset_gate(mm, pg);
536                         BUG_ON(pgd_none(*pgd));
537                         pud = pud_offset(pgd, pg);
538                         BUG_ON(pud_none(*pud));
539                         pmd = pmd_offset(pud, pg);
540                         if (pmd_none(*pmd))
541                                 return i ? : -EFAULT;
542                         pte = pte_offset_map(pmd, pg);
543                         if (pte_none(*pte)) {
544                                 pte_unmap(pte);
545                                 return i ? : -EFAULT;
546                         }
547                         if (pages) {
548                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
549                                 pages[i] = page;
550                                 if (page)
551                                         get_page(page);
552                         }
553                         pte_unmap(pte);
554                         if (vmas)
555                                 vmas[i] = gate_vma;
556                         i++;
557                         start += PAGE_SIZE;
558                         len--;
559                         continue;
560                 }
561
562                 if (!vma ||
563                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
564                     (!ignore && !(vm_flags & vma->vm_flags)))
565                         return i ? : -EFAULT;
566
567                 if (is_vm_hugetlb_page(vma)) {
568 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
569                         i = follow_hugetlb_page(mm, vma, pages, vmas,
570                                                 &start, &len, i);
571 #else
572                         i = follow_hugetlb_page(mm, vma, pages, vmas,
573                                                 &start, &len, i, write);
574 #endif
575                         continue;
576                 }
577
578                 foll_flags = FOLL_TOUCH;
579                 if (pages)
580                         foll_flags |= FOLL_GET;
581
582 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
583 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
584                 if (!write && use_zero_page(vma))
585                   foll_flags |= FOLL_ANON;
586 #endif
587 #endif
588
589                 do {
590                         struct page *page;
591
592 #if 0
593                         /*
594                          * If we have a pending SIGKILL, don't keep faulting
595                          * pages and potentially allocating memory, unless
596                          * current is handling munlock--e.g., on exit. In
597                          * that case, we are not allocating memory.  Rather,
598                          * we're only unlocking already resident/mapped pages.
599                          */
600                         if (unlikely(!ignore_sigkill &&
601                                         fatal_signal_pending(current)))
602                                 return i ? i : -ERESTARTSYS;
603 #endif
604
605                         if (write)
606                                 foll_flags |= FOLL_WRITE;
607
608
609                         //cond_resched();
610
611                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
612                         while (!(page = follow_page(vma, start, foll_flags))) {
613                                 int ret;
614                                 ret = handle_mm_fault(mm, vma, start,
615                                                 foll_flags & FOLL_WRITE);
616
617 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
618                                 if (ret & VM_FAULT_WRITE)
619                                   foll_flags &= ~FOLL_WRITE;
620
621                                 switch (ret & ~VM_FAULT_WRITE) {
622                                 case VM_FAULT_MINOR:
623                                   tsk->min_flt++;
624                                   break;
625                                 case VM_FAULT_MAJOR:
626                                   tsk->maj_flt++;
627                                   break;
628                                 case VM_FAULT_SIGBUS:
629                                   return i ? i : -EFAULT;
630                                 case VM_FAULT_OOM:
631                                   return i ? i : -ENOMEM;
632                                 default:
633                                   BUG();
634                                 }
635
636 #else
637                                 if (ret & VM_FAULT_ERROR) {
638                                   if (ret & VM_FAULT_OOM)
639                                     return i ? i : -ENOMEM;
640                                   else if (ret & VM_FAULT_SIGBUS)
641                                     return i ? i : -EFAULT;
642                                   BUG();
643                                 }
644                                 if (ret & VM_FAULT_MAJOR)
645                                   tsk->maj_flt++;
646                                 else
647                                   tsk->min_flt++;
648
649                                 /*
650                                  * The VM_FAULT_WRITE bit tells us that
651                                  * do_wp_page has broken COW when necessary,
652                                  * even if maybe_mkwrite decided not to set
653                                  * pte_write. We can thus safely do subsequent
654                                  * page lookups as if they were reads. But only
655                                  * do so when looping for pte_write is futile:
656                                  * in some cases userspace may also be wanting
657                                  * to write to the gotten user page, which a
658                                  * read fault here might prevent (a readonly
659                                  * page might get reCOWed by userspace write).
660                                  */
661                                 if ((ret & VM_FAULT_WRITE) &&
662                                     !(vma->vm_flags & VM_WRITE))
663                                   foll_flags &= ~FOLL_WRITE;
664
665                                 //cond_resched();
666 #endif
667
668                         }
669
670                         if (IS_ERR(page))
671                                 return i ? i : PTR_ERR(page);
672                         if (pages) {
673                                 pages[i] = page;
674
675 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
676                                 flush_anon_page(page, start);
677 #else
678                                 flush_anon_page(vma, page, start);
679 #endif
680                                 flush_dcache_page(page);
681                         }
682                         if (vmas)
683                                 vmas[i] = vma;
684                         i++;
685                         start += PAGE_SIZE;
686                         len--;
687                 } while (len && start < vma->vm_end);
688         } while (len);
689         return i;
690 }
691 #endif
692 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
693
694 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
695                 unsigned long start, int len, int write, int force,
696                 struct page **pages, struct vm_area_struct **vmas)
697 {
698 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
699 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
700         int flags = FOLL_TOUCH;
701
702         if (pages)
703                 flags |= FOLL_GET;
704         if (write)
705                 flags |= FOLL_WRITE;
706         if (force)
707                 flags |= FOLL_FORCE;
708 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
709         int flags = 0;
710
711         if (write)
712                 flags |= GUP_FLAGS_WRITE;
713         if (force)
714                 flags |= GUP_FLAGS_FORCE;
715 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
716
717         return __get_user_pages_uprobe(tsk, mm,
718                                 start, len, flags,
719 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
720                                        pages, vmas, 0);
721 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
722                                        pages, vmas);
723 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
724 #else
725         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
726 #endif
727 }
728
729 #define ACCESS_PROCESS_OPTIMIZATION 0
730
731 #if ACCESS_PROCESS_OPTIMIZATION
732
733 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
734 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
735
736 static void read_data_current(unsigned long addr, void *buf, int len)
737 {
738         int step;
739         int pos = 0;
740
741         for (step = GET_STEP_4(len); len; len -= step) {
742                 switch (GET_STEP_4(len)) {
743                 case 1:
744                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
745                         step = 1;
746                         break;
747
748                 case 2:
749                 case 3:
750                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
751                         step = 2;
752                         break;
753
754                 case 4:
755                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
756                         step = 4;
757                         break;
758                 }
759
760                 pos += step;
761         }
762 }
763
764 // not working
765 static void write_data_current(unsigned long addr, void *buf, int len)
766 {
767         int step;
768         int pos = 0;
769
770         for (step = GET_STEP_4(len); len; len -= step) {
771                 switch (GET_STEP_4(len)) {
772                 case 1:
773                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
774                         step = 1;
775                         break;
776
777                 case 2:
778                 case 3:
779                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
780                         step = 2;
781                         break;
782
783                 case 4:
784                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
785                         step = 4;
786                         break;
787                 }
788
789                 pos += step;
790         }
791 }
792 #endif
793
794 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
795 {
796         struct mm_struct *mm;
797         struct vm_area_struct *vma;
798         void *old_buf = buf;
799
800         if (len <= 0) {
801                 return -1;
802         }
803
804 #if ACCESS_PROCESS_OPTIMIZATION
805         if (write == 0 && tsk == current) {
806                 read_data_current(addr, buf, len);
807                 return len;
808         }
809 #endif
810
811         mm = tsk->mm; /* function 'get_task_mm' is to be called */
812         if (!mm)
813                 return 0;
814
815         /* ignore errors, just check how much was successfully transferred */
816         while (len) {
817                 int bytes, ret, offset;
818                 void *maddr;
819                 struct page *page = NULL;
820
821                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
822                                             write, 1, &page, &vma);
823
824                 if (ret <= 0) {
825                         /*
826                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
827                          * we can access using slightly different code.
828                          */
829 #ifdef CONFIG_HAVE_IOREMAP_PROT
830                         vma = find_vma(mm, addr);
831                         if (!vma)
832                                 break;
833                         if (vma->vm_ops && vma->vm_ops->access)
834                                 ret = vma->vm_ops->access(vma, addr, buf,
835                                                           len, write);
836                         if (ret <= 0)
837 #endif
838                                 break;
839                         bytes = ret;
840                 } else {
841                         bytes = len;
842                         offset = addr & (PAGE_SIZE-1);
843                         if (bytes > PAGE_SIZE-offset)
844                                 bytes = PAGE_SIZE-offset;
845
846                         maddr = kmap_atomic(page);
847
848                         if (write) {
849                                 copy_to_user_page(vma, page, addr,
850                                                   maddr + offset, buf, bytes);
851                                 set_page_dirty_lock(page);
852                         } else {
853                                 copy_from_user_page(vma, page, addr,
854                                                     buf, maddr + offset, bytes);
855                         }
856
857                         kunmap_atomic(maddr);
858                         page_cache_release(page);
859                 }
860                 len -= bytes;
861                 buf += bytes;
862                 addr += bytes;
863         }
864
865         return buf - old_buf;
866 }
867
868 int page_present (struct mm_struct *mm, unsigned long address)
869 {
870         pgd_t *pgd;
871         pud_t *pud;
872         pmd_t *pmd;
873         pte_t *ptep, pte;
874         unsigned long pfn;
875
876         pgd = pgd_offset(mm, address);
877         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
878                 goto out;
879
880         pud = pud_offset(pgd, address);
881         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
882                 goto out;
883
884         pmd = pmd_offset(pud, address);
885         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
886                 goto out;
887
888         ptep = pte_offset_map(pmd, address);
889         if (!ptep)
890                 goto out;
891
892         pte = *ptep;
893         pte_unmap(ptep);
894         if (pte_present(pte)) {
895                 pfn = pte_pfn(pte);
896                 if (pfn_valid(pfn)) {
897                         return 1;
898                 }
899         }
900
901 out:
902         return 0;
903 }
904
905
906 EXPORT_SYMBOL_GPL (page_present);
907 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
908 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
909