kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.c
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2013
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for
  23  *              both user and kernel spaces.
  24  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module
  25  *              for separating core and arch parts
  26  * 2010-2012    Dmitry Kovalenko <d.kovalenko@samsung.com>,
  27  *              Nikita Kalyazin <n.kalyazin@samsung.com>
  28  *              improvement and bugs fixing
  29  * 2010-2011    Alexander Shirshikov
  30  *              improvement and bugs fixing
  31  * 2011-2012    Stanislav Andreev <s.andreev@samsung.com>
  32  *              improvement and bugs fixing
  33  * 2012         Vitaliy Cherepanov <v.chereapanov@samsung.com>
  34  *              improvement and bugs fixing
  35  * 2012-2013    Vasiliy Ulyanov <v.ulyanov@samsung.com>,
  36  *              Vyacheslav Cherkashin <v.cherkashin@samsung.com>
  37  *              improvement and bugs fixing
  38  */
  39
  40 #include <linux/module.h>
  41 #include <linux/sched.h>
  42
  43 #include <asm/pgtable.h>
  44
  45 #include "dbi_kprobes_deps.h"
  46 #include "dbi_kdebug.h"
  47
  48
  49 #include <linux/slab.h>
  50 #include <linux/mm.h>
  51
  52 unsigned long sched_addr;
  53 unsigned long fork_addr;
  54 unsigned long exit_addr;
  55 unsigned long sys_exit_group_addr;
  56 unsigned long do_group_exit_addr;
  57
  58 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  59 /* kernel define 'pgd_offset_k' redefinition */
  60 #undef pgd_offset_k
  61 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
  62 #endif
  63
  64 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
  65 #ifndef is_zero_pfn
  66
  67 static unsigned long swap_zero_pfn = 0;
  68
  69 #endif /* is_zero_pfn */
  70 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
  71
  72 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
  73 static inline void *dbi_kmap_atomic(struct page *page)
  74 {
  75         return kmap_atomic(page);
  76 }
  77 static inline void dbi_kunmap_atomic(void *kvaddr)
  78 {
  79         kunmap_atomic(kvaddr);
  80 }
  81 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  82 static inline void *dbi_kmap_atomic(struct page *page)
  83 {
  84         return kmap_atomic(page, KM_USER0);
  85 }
  86
  87 static inline void dbi_kunmap_atomic(void *kvaddr)
  88 {
  89         kunmap_atomic(kvaddr, KM_USER0);
  90 }
  91 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  92
  93 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  94 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
  95 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
  96 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
  97 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
  98 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  99 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
 100 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
 101 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 102
 103 /* copy_to_user_page */
 104 #ifndef copy_to_user_page
 105 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
 106 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
 107 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
 108 #endif /* copy_to_user_page */
 109
 110
 111 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
 112
 113 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
 114
 115 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 116 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 117 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
 118 #endif
 119 #else
 120 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
 121 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
 122
 123 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 124 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
 125 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 126 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
 127 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 128
 129 #ifdef CONFIG_HUGETLB_PAGE
 130 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
 131                 struct vm_area_struct *vma, struct page **pages, \
 132                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
 133                 int i, int write);
 134 #endif
 135
 136 #ifdef __HAVE_ARCH_GATE_AREA
 137 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 138 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
 139 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 140 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
 141 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 142 #endif /* __HAVE_ARCH_GATE_AREA */
 143
 144 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 145 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
 146 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 147 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
 148 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 149
 150 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 151 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
 152                 struct page *, struct vm_area_struct * vma, \
 153                 unsigned long address, unsigned int foll_flags, \
 154                 unsigned int *page_mask);
 155 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
 156                                 struct vm_area_struct * vma, \
 157                                 unsigned long address, \
 158                                 unsigned int foll_flags, \
 159                                 unsigned int *page_mask)
 160 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
 161 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 162 static DECLARE_MOD_FUNC_DEP(follow_page, \
 163                 struct page *, struct vm_area_struct * vma, \
 164                 unsigned long address, unsigned int foll_flags);
 165 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
 166                                 struct vm_area_struct * vma, \
 167                                 unsigned long address, \
 168                                 unsigned int foll_flags)
 169 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 170 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 171
 172 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 173                 void, struct vm_area_struct *vma, struct page *page, \
 174                 unsigned long vmaddr);
 175 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 176                 struct page *, struct vm_area_struct *vma, \
 177                 unsigned long addr, pte_t pte);
 178 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
 179                 void, struct vm_area_struct *vma, struct page *page, \
 180                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
 181
 182
 183 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 184 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 185                 void, struct task_struct *tsk);
 186 #else
 187 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 188                 void, struct rcu_head * rhp);
 189 #endif
 190
 191         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 192 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
 193
 194         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
 195 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 196
 197 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 198 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 199         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 200                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 201 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 202 #endif
 203 #else
 204         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 205                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 206 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 207 #endif
 208
 209 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 210         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 211                         struct vm_area_struct *, struct mm_struct *mm)
 212 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 213 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 214         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 215                         struct vm_area_struct *, struct task_struct *tsk)
 216 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 218
 219 #ifdef CONFIG_HUGETLB_PAGE
 220         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
 221         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
 222 #endif
 223
 224 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
 225 {
 226 #ifdef __HAVE_ARCH_GATE_AREA
 227 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 228         struct mm_struct *mm = task->mm;
 229         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 230 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 231         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 232 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 233 #else /*__HAVE_ARCH_GATE_AREA */
 234         return in_gate_area(task, addr);
 235 #endif/*__HAVE_ARCH_GATE_AREA */
 236 }
 237
 238
 239 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 240 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
 241 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 242 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 243 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
 244 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 245 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 246
 247 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
 248 {
 249 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 250         return in_gate_area_no_mm(addr);
 251 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 252         return in_gate_area_no_task(addr);
 253 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 254 }
 255
 256 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 257                         void, struct vm_area_struct *vma, \
 258                         struct page *page, unsigned long vmaddr)
 259 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 260
 261 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 262                         struct page *, struct vm_area_struct *vma, \
 263                         unsigned long addr, pte_t pte)
 264 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 265
 266 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
 267         void, struct vm_area_struct *vma, struct page *page, \
 268         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 269 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 270
 271
 272
 273 int init_module_dependencies(void)
 274 {
 275
 276 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 277         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 278 #endif
 279
 280 #ifndef copy_to_user_page
 281         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 282 #endif /* copy_to_user_page */
 283
 284         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 285         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 286         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 287
 288 #ifdef CONFIG_HUGETLB_PAGE
 289         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 290 #endif
 291
 292 #ifdef  __HAVE_ARCH_GATE_AREA
 293         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 294 #endif
 295
 296 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 297         INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
 298 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 299         INIT_MOD_DEP_VAR(follow_page, follow_page);
 300 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 301
 302 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 303
 304 #ifndef is_zero_pfn
 305         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
 306 #endif /* is_zero_pfn */
 307
 308         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 309 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 310         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 311 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 312
 313         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 314         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 315         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 316
 317 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 318 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 319         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 320 # else
 321         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 322 # endif
 323 #else /*2.6.16 */
 324         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 325 #endif
 326 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 327         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 328 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 329
 330         return 0;
 331 }
 332
 333 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 334 #define GUP_FLAGS_WRITE                  0x1
 335 #define GUP_FLAGS_FORCE                  0x2
 336 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 337 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 338 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 339
 340 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 341 static inline int use_zero_page(struct vm_area_struct *vma)
 342 {
 343         /*
 344          * We don't want to optimize FOLL_ANON for make_pages_present()
 345          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 346          * we want to get the page from the page tables to make sure
 347          * that we serialize and update with any other user of that
 348          * mapping.
 349          */
 350         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 351                 return 0;
 352         /*
 353          * And if we have a fault routine, it's not an anonymous region.
 354          */
 355         return !vma->vm_ops || !vma->vm_ops->fault;
 356 }
 357
 358 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 359
 360 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 361
 362 #ifdef __HAVE_COLOR_ZERO_PAGE
 363
 364 static inline int swap_is_zero_pfn(unsigned long pfn)
 365 {
 366         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
 367         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 368 }
 369
 370 #else /* __HAVE_COLOR_ZERO_PAGE */
 371
 372 static inline int swap_is_zero_pfn(unsigned long pfn)
 373 {
 374         return pfn == swap_zero_pfn;
 375 }
 376 #endif /* __HAVE_COLOR_ZERO_PAGE */
 377
 378 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 379
 380 static inline int swap_is_zero_pfn(unsigned long pfn)
 381 {
 382 #ifndef is_zero_pfn
 383         return pfn == swap_zero_pfn;
 384 #else /* is_zero_pfn */
 385         return is_zero_pfn(pfn);
 386 #endif /* is_zero_pfn */
 387 }
 388
 389 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 390
 391 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 392 {
 393         return stack_guard_page_start(vma, addr) ||
 394                         stack_guard_page_end(vma, addr+PAGE_SIZE);
 395 }
 396
 397 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 398
 399 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
 400         unsigned long address, unsigned int foll_flags)
 401 {
 402     unsigned int unused_page_mask;
 403     return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
 404 }
 405
 406 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 407                 unsigned long start, unsigned long nr_pages,
 408                 unsigned int gup_flags, struct page **pages,
 409                 struct vm_area_struct **vmas, int *nonblocking)
 410 {
 411         long i;
 412         unsigned long vm_flags;
 413         unsigned int page_mask;
 414
 415         if (!nr_pages)
 416                 return 0;
 417
 418         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 419
 420         /*
 421          * Require read or write permissions.
 422          * If FOLL_FORCE is set, we only require the "MAY" flags.
 423          */
 424         vm_flags  = (gup_flags & FOLL_WRITE) ?
 425                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 426         vm_flags &= (gup_flags & FOLL_FORCE) ?
 427                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 428
 429         /*
 430          * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
 431          * would be called on PROT_NONE ranges. We must never invoke
 432          * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
 433          * page faults would unprotect the PROT_NONE ranges if
 434          * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
 435          * bitflag. So to avoid that, don't set FOLL_NUMA if
 436          * FOLL_FORCE is set.
 437          */
 438         if (!(gup_flags & FOLL_FORCE))
 439                 gup_flags |= FOLL_NUMA;
 440
 441         i = 0;
 442
 443         do {
 444                 struct vm_area_struct *vma;
 445
 446                 vma = find_extend_vma(mm, start);
 447                 if (!vma && dbi_in_gate_area(tsk, start)) {
 448                         unsigned long pg = start & PAGE_MASK;
 449                         pgd_t *pgd;
 450                         pud_t *pud;
 451                         pmd_t *pmd;
 452                         pte_t *pte;
 453
 454                         /* user gate pages are read-only */
 455                         if (gup_flags & FOLL_WRITE)
 456                                 return i ? : -EFAULT;
 457                         if (pg > TASK_SIZE)
 458                                 pgd = pgd_offset_k(pg);
 459                         else
 460                                 pgd = pgd_offset_gate(mm, pg);
 461                         BUG_ON(pgd_none(*pgd));
 462                         pud = pud_offset(pgd, pg);
 463                         BUG_ON(pud_none(*pud));
 464                         pmd = pmd_offset(pud, pg);
 465                         if (pmd_none(*pmd))
 466                                 return i ? : -EFAULT;
 467                         VM_BUG_ON(pmd_trans_huge(*pmd));
 468                         pte = pte_offset_map(pmd, pg);
 469                         if (pte_none(*pte)) {
 470                                 pte_unmap(pte);
 471                                 return i ? : -EFAULT;
 472                         }
 473                         vma = get_gate_vma(mm);
 474                         if (pages) {
 475                                 struct page *page;
 476
 477                                 page = vm_normal_page(vma, start, *pte);
 478                                 if (!page) {
 479                                         if (!(gup_flags & FOLL_DUMP) &&
 480                                              swap_is_zero_pfn(pte_pfn(*pte)))
 481                                                 page = pte_page(*pte);
 482                                         else {
 483                                                 pte_unmap(pte);
 484                                                 return i ? : -EFAULT;
 485                                         }
 486                                 }
 487                                 pages[i] = page;
 488                                 get_page(page);
 489                         }
 490                         pte_unmap(pte);
 491                         page_mask = 0;
 492                         goto next_page;
 493                 }
 494
 495                 if (!vma ||
 496                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 497                     !(vm_flags & vma->vm_flags))
 498                         return i ? : -EFAULT;
 499
 500                 if (is_vm_hugetlb_page(vma)) {
 501                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 502                                         &start, &nr_pages, i, gup_flags);
 503                         continue;
 504                 }
 505
 506                 do {
 507                         struct page *page;
 508                         unsigned int foll_flags = gup_flags;
 509                         unsigned int page_increm;
 510
 511                         /*
 512                          * If we have a pending SIGKILL, don't keep faulting
 513                          * pages and potentially allocating memory.
 514                          */
 515                         if (unlikely(fatal_signal_pending(current)))
 516                                 return i ? i : -ERESTARTSYS;
 517
 518                         /* cond_resched(); */
 519                         while (!(page = follow_page_mask(vma, start,
 520                                                 foll_flags, &page_mask))) {
 521                                 int ret;
 522                                 unsigned int fault_flags = 0;
 523
 524                                 /* For mlock, just skip the stack guard page. */
 525                                 if (foll_flags & FOLL_MLOCK) {
 526                                         if (stack_guard_page(vma, start))
 527                                                 goto next_page;
 528                                 }
 529                                 if (foll_flags & FOLL_WRITE)
 530                                         fault_flags |= FAULT_FLAG_WRITE;
 531                                 if (nonblocking)
 532                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 533                                 if (foll_flags & FOLL_NOWAIT)
 534                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 535
 536                                 ret = handle_mm_fault(mm, vma, start,
 537                                                         fault_flags);
 538
 539                                 if (ret & VM_FAULT_ERROR) {
 540                                         if (ret & VM_FAULT_OOM)
 541                                                 return i ? i : -ENOMEM;
 542                                         if (ret & (VM_FAULT_HWPOISON |
 543                                                    VM_FAULT_HWPOISON_LARGE)) {
 544                                                 if (i)
 545                                                         return i;
 546                                                 else if (gup_flags & FOLL_HWPOISON)
 547                                                         return -EHWPOISON;
 548                                                 else
 549                                                         return -EFAULT;
 550                                         }
 551                                         if (ret & VM_FAULT_SIGBUS)
 552                                                 return i ? i : -EFAULT;
 553                                         BUG();
 554                                 }
 555
 556                                 if (tsk) {
 557                                         if (ret & VM_FAULT_MAJOR)
 558                                                 tsk->maj_flt++;
 559                                         else
 560                                                 tsk->min_flt++;
 561                                 }
 562
 563                                 if (ret & VM_FAULT_RETRY) {
 564                                         if (nonblocking)
 565                                                 *nonblocking = 0;
 566                                         return i;
 567                                 }
 568
 569                                 /*
 570                                  * The VM_FAULT_WRITE bit tells us that
 571                                  * do_wp_page has broken COW when necessary,
 572                                  * even if maybe_mkwrite decided not to set
 573                                  * pte_write. We can thus safely do subsequent
 574                                  * page lookups as if they were reads. But only
 575                                  * do so when looping for pte_write is futile:
 576                                  * in some cases userspace may also be wanting
 577                                  * to write to the gotten user page, which a
 578                                  * read fault here might prevent (a readonly
 579                                  * page might get reCOWed by userspace write).
 580                                  */
 581                                 if ((ret & VM_FAULT_WRITE) &&
 582                                     !(vma->vm_flags & VM_WRITE))
 583                                         foll_flags &= ~FOLL_WRITE;
 584
 585                                 /* cond_resched(); */
 586                         }
 587                         if (IS_ERR(page))
 588                                 return i ? i : PTR_ERR(page);
 589                         if (pages) {
 590                                 pages[i] = page;
 591
 592                                 flush_anon_page(vma, page, start);
 593                                 flush_dcache_page(page);
 594                                 page_mask = 0;
 595                         }
 596 next_page:
 597                         if (vmas) {
 598                                 vmas[i] = vma;
 599                                 page_mask = 0;
 600                         }
 601                         page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
 602                         if (page_increm > nr_pages)
 603                                 page_increm = nr_pages;
 604                         i += page_increm;
 605                         start += page_increm * PAGE_SIZE;
 606                         nr_pages -= page_increm;
 607                 } while (nr_pages && start < vma->vm_end);
 608         } while (nr_pages);
 609         return i;
 610 }
 611
 612 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 613
 614 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 615                         unsigned long start, int nr_pages, unsigned int gup_flags,
 616                         struct page **pages, struct vm_area_struct **vmas,
 617                         int *nonblocking)
 618 {
 619         int i;
 620         unsigned long vm_flags;
 621
 622         if (nr_pages <= 0) {
 623                 return 0;
 624         }
 625
 626         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 627
 628         /*
 629          * Require read or write permissions.
 630          * If FOLL_FORCE is set, we only require the "MAY" flags.
 631          */
 632         vm_flags  = (gup_flags & FOLL_WRITE) ?
 633                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 634         vm_flags &= (gup_flags & FOLL_FORCE) ?
 635                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 636         i = 0;
 637
 638         do {
 639                 struct vm_area_struct *vma;
 640
 641                 vma = find_extend_vma(mm, start);
 642                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
 643                         unsigned long pg = start & PAGE_MASK;
 644                         pgd_t *pgd;
 645                         pud_t *pud;
 646                         pmd_t *pmd;
 647                         pte_t *pte;
 648
 649                         /* user gate pages are read-only */
 650                         if (gup_flags & FOLL_WRITE) {
 651                                 return i ? : -EFAULT;
 652                         }
 653                         if (pg > TASK_SIZE)
 654                                 pgd = pgd_offset_k(pg);
 655                         else
 656                                 pgd = pgd_offset_gate(mm, pg);
 657                         BUG_ON(pgd_none(*pgd));
 658                         pud = pud_offset(pgd, pg);
 659                         BUG_ON(pud_none(*pud));
 660                         pmd = pmd_offset(pud, pg);
 661                         if (pmd_none(*pmd)) {
 662                                 return i ? : -EFAULT;
 663                         }
 664                         VM_BUG_ON(pmd_trans_huge(*pmd));
 665                         pte = pte_offset_map(pmd, pg);
 666                         if (pte_none(*pte)) {
 667                                 pte_unmap(pte);
 668                                 return i ? : -EFAULT;
 669                         }
 670                         vma = get_gate_vma(mm);
 671                         if (pages) {
 672                                 struct page *page;
 673
 674                                 page = vm_normal_page(vma, start, *pte);
 675                                 if (!page) {
 676                                         if (!(gup_flags & FOLL_DUMP) &&
 677                                                 swap_is_zero_pfn(pte_pfn(*pte)))
 678                                                 page = pte_page(*pte);
 679                                         else {
 680                                                 pte_unmap(pte);
 681                                                 return i ? : -EFAULT;
 682                                         }
 683                                 }
 684                                 pages[i] = page;
 685                                 get_page(page);
 686                         }
 687                         pte_unmap(pte);
 688                         goto next_page;
 689                 }
 690
 691                 if (!vma ||
 692                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 693                         !(vm_flags & vma->vm_flags)) {
 694                         return i ? : -EFAULT;
 695                 }
 696
 697                 if (is_vm_hugetlb_page(vma)) {
 698                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 699                                         &start, &nr_pages, i, gup_flags);
 700                         continue;
 701                 }
 702
 703                 do {
 704                         struct page *page;
 705                         unsigned int foll_flags = gup_flags;
 706
 707                         /*
 708                          * If we have a pending SIGKILL, don't keep faulting
 709                          * pages and potentially allocating memory.
 710                          */
 711                         if (unlikely(fatal_signal_pending(current))) {
 712                                 return i ? i : -ERESTARTSYS;
 713                         }
 714
 715                         /* cond_resched(); */
 716                         while (!(page = follow_page(vma, start, foll_flags))) {
 717                                 int ret;
 718                                 unsigned int fault_flags = 0;
 719
 720                                 /* For mlock, just skip the stack guard page. */
 721                                 if (foll_flags & FOLL_MLOCK) {
 722                                         if (stack_guard_page(vma, start))
 723                                                 goto next_page;
 724                                 }
 725                                 if (foll_flags & FOLL_WRITE)
 726                                         fault_flags |= FAULT_FLAG_WRITE;
 727                                 if (nonblocking)
 728                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 729                                 if (foll_flags & FOLL_NOWAIT)
 730                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 731
 732                                 ret = handle_mm_fault(mm, vma, start,
 733                                                         fault_flags);
 734
 735                                 if (ret & VM_FAULT_ERROR) {
 736                                         if (ret & VM_FAULT_OOM) {
 737                                                 return i ? i : -ENOMEM;
 738                                         }
 739                                         if (ret & (VM_FAULT_HWPOISON |
 740                                                                 VM_FAULT_HWPOISON_LARGE)) {
 741                                                 if (i) {
 742                                                         return i;
 743                                                 }
 744                                                 else if (gup_flags & FOLL_HWPOISON) {
 745                                                         return -EHWPOISON;
 746                                                 }
 747                                                 else {
 748                                                         return -EFAULT;
 749                                                 }
 750                                         }
 751                                         if (ret & VM_FAULT_SIGBUS) {
 752                                                 return i ? i : -EFAULT;
 753                                         }
 754                                         BUG();
 755                                 }
 756
 757                                 if (tsk) {
 758                                         if (ret & VM_FAULT_MAJOR)
 759                                                 tsk->maj_flt++;
 760                                         else
 761                                                 tsk->min_flt++;
 762                                 }
 763
 764                                 if (ret & VM_FAULT_RETRY) {
 765                                         if (nonblocking)
 766                                                 *nonblocking = 0;
 767                                         return i;
 768                                 }
 769
 770                                 /*
 771                                  * The VM_FAULT_WRITE bit tells us that
 772                                  * do_wp_page has broken COW when necessary,
 773                                  * even if maybe_mkwrite decided not to set
 774                                  * pte_write. We can thus safely do subsequent
 775                                  * page lookups as if they were reads. But only
 776                                  * do so when looping for pte_write is futile:
 777                                  * in some cases userspace may also be wanting
 778                                  * to write to the gotten user page, which a
 779                                  * read fault here might prevent (a readonly
 780                                  * page might get reCOWed by userspace write).
 781                                  */
 782                                 if ((ret & VM_FAULT_WRITE) &&
 783                                         !(vma->vm_flags & VM_WRITE))
 784                                         foll_flags &= ~FOLL_WRITE;
 785
 786                                 /* cond_resched(); */
 787                         }
 788                         if (IS_ERR(page)) {
 789                                 return i ? i : PTR_ERR(page);
 790                         }
 791                         if (pages) {
 792                                 pages[i] = page;
 793
 794                                 flush_anon_page(vma, page, start);
 795                                 flush_dcache_page(page);
 796                         }
 797 next_page:
 798                         if (vmas)
 799                                 vmas[i] = vma;
 800                         i++;
 801                         start += PAGE_SIZE;
 802                         nr_pages--;
 803                 } while (nr_pages && start < vma->vm_end);
 804         } while (nr_pages);
 805
 806         return i;
 807 }
 808
 809 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 810
 811 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 812
 813 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 814                 unsigned long start, int len, int flags,
 815                 struct page **pages, struct vm_area_struct **vmas)
 816 {
 817         int i;
 818         unsigned int vm_flags = 0;
 819         int write = !!(flags & GUP_FLAGS_WRITE);
 820         int force = !!(flags & GUP_FLAGS_FORCE);
 821         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 822
 823         if (len <= 0)
 824                 return 0;
 825         /*
 826          * Require read or write permissions.
 827          * If 'force' is set, we only require the "MAY" flags.
 828          */
 829         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 830         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 831         i = 0;
 832
 833         do {
 834                 struct vm_area_struct *vma;
 835                 unsigned int foll_flags;
 836
 837                 vma = find_vma(mm, start);
 838                 if (!vma && dbi_in_gate_area(tsk, start)) {
 839                         unsigned long pg = start & PAGE_MASK;
 840                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 841                         pgd_t *pgd;
 842                         pud_t *pud;
 843                         pmd_t *pmd;
 844                         pte_t *pte;
 845
 846                         /* user gate pages are read-only */
 847                         if (!ignore && write)
 848                                 return i ? : -EFAULT;
 849                         if (pg > TASK_SIZE)
 850                                 pgd = pgd_offset_k(pg);
 851                         else
 852                                 pgd = pgd_offset_gate(mm, pg);
 853                         BUG_ON(pgd_none(*pgd));
 854                         pud = pud_offset(pgd, pg);
 855                         BUG_ON(pud_none(*pud));
 856                         pmd = pmd_offset(pud, pg);
 857                         if (pmd_none(*pmd))
 858                                 return i ? : -EFAULT;
 859                         pte = pte_offset_map(pmd, pg);
 860                         if (pte_none(*pte)) {
 861                                 pte_unmap(pte);
 862                                 return i ? : -EFAULT;
 863                         }
 864                         if (pages) {
 865                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 866                                 pages[i] = page;
 867                                 if (page)
 868                                         get_page(page);
 869                         }
 870                         pte_unmap(pte);
 871                         if (vmas)
 872                                 vmas[i] = gate_vma;
 873                         i++;
 874                         start += PAGE_SIZE;
 875                         len--;
 876                         continue;
 877                 }
 878
 879                 if (!vma ||
 880                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 881                         (!ignore && !(vm_flags & vma->vm_flags)))
 882                         return i ? : -EFAULT;
 883
 884                 if (is_vm_hugetlb_page(vma)) {
 885 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 886                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 887                                                 &start, &len, i);
 888 #else
 889                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 890                                                 &start, &len, i, write);
 891 #endif
 892                         continue;
 893                 }
 894
 895                 foll_flags = FOLL_TOUCH;
 896                 if (pages)
 897                         foll_flags |= FOLL_GET;
 898
 899 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 900 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 901                 if (!write && use_zero_page(vma))
 902                         foll_flags |= FOLL_ANON;
 903 #endif
 904 #endif
 905
 906                 do {
 907                         struct page *page;
 908
 909                         if (write)
 910                                 foll_flags |= FOLL_WRITE;
 911
 912
 913                         //cond_resched();
 914
 915                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 916                         while (!(page = follow_page(vma, start, foll_flags))) {
 917                                 int ret;
 918                                 ret = handle_mm_fault(mm, vma, start,
 919                                                 foll_flags & FOLL_WRITE);
 920
 921 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 922                                 if (ret & VM_FAULT_WRITE)
 923                                         foll_flags &= ~FOLL_WRITE;
 924
 925                                 switch (ret & ~VM_FAULT_WRITE) {
 926                                 case VM_FAULT_MINOR:
 927                                         tsk->min_flt++;
 928                                         break;
 929                                 case VM_FAULT_MAJOR:
 930                                         tsk->maj_flt++;
 931                                         break;
 932                                 case VM_FAULT_SIGBUS:
 933                                         return i ? i : -EFAULT;
 934                                 case VM_FAULT_OOM:
 935                                         return i ? i : -ENOMEM;
 936                                 default:
 937                                         BUG();
 938                                 }
 939
 940 #else
 941                                 if (ret & VM_FAULT_ERROR) {
 942                                         if (ret & VM_FAULT_OOM)
 943                                                 return i ? i : -ENOMEM;
 944                                         else if (ret & VM_FAULT_SIGBUS)
 945                                                 return i ? i : -EFAULT;
 946                                         BUG();
 947                                 }
 948                                 if (ret & VM_FAULT_MAJOR)
 949                                         tsk->maj_flt++;
 950                                 else
 951                                         tsk->min_flt++;
 952
 953                                 /*
 954                                  * The VM_FAULT_WRITE bit tells us that
 955                                  * do_wp_page has broken COW when necessary,
 956                                  * even if maybe_mkwrite decided not to set
 957                                  * pte_write. We can thus safely do subsequent
 958                                  * page lookups as if they were reads. But only
 959                                  * do so when looping for pte_write is futile:
 960                                  * in some cases userspace may also be wanting
 961                                  * to write to the gotten user page, which a
 962                                  * read fault here might prevent (a readonly
 963                                  * page might get reCOWed by userspace write).
 964                                  */
 965                                 if ((ret & VM_FAULT_WRITE) &&
 966                                                 !(vma->vm_flags & VM_WRITE))
 967                                         foll_flags &= ~FOLL_WRITE;
 968
 969                                 //cond_resched();
 970 #endif
 971
 972                         }
 973
 974                         if (IS_ERR(page))
 975                                 return i ? i : PTR_ERR(page);
 976                         if (pages) {
 977                                 pages[i] = page;
 978
 979 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 980                                 flush_anon_page(page, start);
 981 #else
 982                                 flush_anon_page(vma, page, start);
 983 #endif
 984                                 flush_dcache_page(page);
 985                         }
 986                         if (vmas)
 987                                 vmas[i] = vma;
 988                         i++;
 989                         start += PAGE_SIZE;
 990                         len--;
 991                 } while (len && start < vma->vm_end);
 992         } while (len);
 993         return i;
 994 }
 995 #endif
 996 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 997
 998 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 999                 unsigned long start, int len, int write, int force,
1000                 struct page **pages, struct vm_area_struct **vmas)
1001 {
1002 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1003 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1004         int flags = FOLL_TOUCH;
1005
1006         if (pages)
1007                 flags |= FOLL_GET;
1008         if (write)
1009                 flags |= FOLL_WRITE;
1010         if (force)
1011                 flags |= FOLL_FORCE;
1012 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1013         int flags = 0;
1014
1015         if (write)
1016                 flags |= GUP_FLAGS_WRITE;
1017         if (force)
1018                 flags |= GUP_FLAGS_FORCE;
1019 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1020
1021         return __get_user_pages_uprobe(tsk, mm,
1022                                 start, len, flags,
1023 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1024                                                 pages, vmas, NULL);
1025 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1026                                                 pages, vmas);
1027 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1028 #else
1029         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1030 #endif
1031 }
1032
1033 #define ACCESS_PROCESS_OPTIMIZATION 0
1034
1035 #if ACCESS_PROCESS_OPTIMIZATION
1036
1037 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1038 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1039
1040 static void read_data_current(unsigned long addr, void *buf, int len)
1041 {
1042         int step;
1043         int pos = 0;
1044
1045         for (step = GET_STEP_4(len); len; len -= step) {
1046                 switch (GET_STEP_4(len)) {
1047                 case 1:
1048                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1049                         step = 1;
1050                         break;
1051
1052                 case 2:
1053                 case 3:
1054                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1055                         step = 2;
1056                         break;
1057
1058                 case 4:
1059                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1060                         step = 4;
1061                         break;
1062                 }
1063
1064                 pos += step;
1065         }
1066 }
1067
1068 // not working
1069 static void write_data_current(unsigned long addr, void *buf, int len)
1070 {
1071         int step;
1072         int pos = 0;
1073
1074         for (step = GET_STEP_4(len); len; len -= step) {
1075                 switch (GET_STEP_4(len)) {
1076                 case 1:
1077                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1078                         step = 1;
1079                         break;
1080
1081                 case 2:
1082                 case 3:
1083                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1084                         step = 2;
1085                         break;
1086
1087                 case 4:
1088                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1089                         step = 4;
1090                         break;
1091                 }
1092
1093                 pos += step;
1094         }
1095 }
1096 #endif
1097
1098 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1099 {
1100         struct mm_struct *mm;
1101         struct vm_area_struct *vma;
1102         void *old_buf = buf;
1103
1104         if (len <= 0) {
1105                 return -1;
1106         }
1107
1108 #if ACCESS_PROCESS_OPTIMIZATION
1109         if (write == 0 && tsk == current) {
1110                 read_data_current(addr, buf, len);
1111                 return len;
1112         }
1113 #endif
1114
1115         mm = tsk->mm; /* function 'get_task_mm' is to be called */
1116         if (!mm)
1117                 return 0;
1118
1119         /* ignore errors, just check how much was successfully transferred */
1120         while (len) {
1121                 int bytes, ret, offset;
1122                 void *maddr;
1123                 struct page *page = NULL;
1124
1125                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1126                                                 write, 1, &page, &vma);
1127
1128                 if (ret <= 0) {
1129                         /*
1130                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
1131                          * we can access using slightly different code.
1132                          */
1133 #ifdef CONFIG_HAVE_IOREMAP_PROT
1134                         vma = find_vma(mm, addr);
1135                         if (!vma)
1136                                 break;
1137                         if (vma->vm_ops && vma->vm_ops->access)
1138                                 ret = vma->vm_ops->access(vma, addr, buf,
1139                                                         len, write);
1140                         if (ret <= 0)
1141 #endif
1142                                 break;
1143                         bytes = ret;
1144                 } else {
1145                         bytes = len;
1146                         offset = addr & (PAGE_SIZE-1);
1147                         if (bytes > PAGE_SIZE-offset)
1148                                 bytes = PAGE_SIZE-offset;
1149
1150                         maddr = dbi_kmap_atomic(page);
1151
1152                         if (write) {
1153                                 copy_to_user_page(vma, page, addr,
1154                                                         maddr + offset, buf, bytes);
1155                                 set_page_dirty_lock(page);
1156                         } else {
1157                                 copy_from_user_page(vma, page, addr,
1158                                                         buf, maddr + offset, bytes);
1159                         }
1160
1161                         dbi_kunmap_atomic(maddr);
1162                         page_cache_release(page);
1163                 }
1164                 len -= bytes;
1165                 buf += bytes;
1166                 addr += bytes;
1167         }
1168
1169         return buf - old_buf;
1170 }
1171
1172 int page_present (struct mm_struct *mm, unsigned long address)
1173 {
1174         pgd_t *pgd;
1175         pud_t *pud;
1176         pmd_t *pmd;
1177         pte_t *ptep, pte;
1178         unsigned long pfn;
1179
1180         pgd = pgd_offset(mm, address);
1181         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1182                 goto out;
1183
1184         pud = pud_offset(pgd, address);
1185         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1186                 goto out;
1187
1188         pmd = pmd_offset(pud, address);
1189         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1190                 goto out;
1191
1192         ptep = pte_offset_map(pmd, address);
1193         if (!ptep)
1194                 goto out;
1195
1196         pte = *ptep;
1197         pte_unmap(ptep);
1198         if (pte_present(pte)) {
1199                 pfn = pte_pfn(pte);
1200                 if (pfn_valid(pfn)) {
1201                         return 1;
1202                 }
1203         }
1204
1205 out:
1206         return 0;
1207 }
1208
1209
1210 EXPORT_SYMBOL_GPL (page_present);
1211 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
1212 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
1213