kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25  */
  26
  27 #include <linux/module.h>
  28 #include <linux/sched.h>
  29
  30 #include <asm/pgtable.h>
  31
  32 #include "dbi_kprobes_deps.h"
  33 #include "dbi_kdebug.h"
  34
  35
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38
  39 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  40 /* kernel define 'pgd_offset_k' redefinition */
  41 #undef pgd_offset_k
  42 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
  43 #endif
  44
  45 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
  46 #ifndef is_zero_pfn
  47
  48 static unsigned long swap_zero_pfn = 0;
  49
  50 #endif /* is_zero_pfn */
  51 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
  52
  53 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
  54 static inline void *dbi_kmap_atomic(struct page *page)
  55 {
  56         return kmap_atomic(page);
  57 }
  58 static inline void dbi_kunmap_atomic(void *kvaddr)
  59 {
  60         kunmap_atomic(kvaddr);
  61 }
  62 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  63 static inline void *dbi_kmap_atomic(struct page *page)
  64 {
  65         return kmap_atomic(page, KM_USER0);
  66 }
  67
  68 static inline void dbi_kunmap_atomic(void *kvaddr)
  69 {
  70         kunmap_atomic(kvaddr, KM_USER0);
  71 }
  72 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  73
  74 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  75 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
  76 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
  77 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
  78 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
  79 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  80 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
  81 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
  82 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  83
  84 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
  85 EXPORT_SYMBOL_GPL(do_mmap_pgoff);
  86 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  87
  88 /* copy_to_user_page */
  89 #ifndef copy_to_user_page
  90 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
  91 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
  92 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
  93 #endif /* copy_to_user_page */
  94
  95
  96 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  97
  98 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
  99
 100 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 101 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 102 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
 103 #endif
 104 #else
 105 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
 106 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
 107
 108 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 109 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
 110 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 111 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
 112 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 113
 114 #ifdef CONFIG_HUGETLB_PAGE
 115 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
 116                 struct vm_area_struct *vma, struct page **pages, \
 117                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
 118                 int i, int write);
 119 #endif
 120
 121 #ifdef __HAVE_ARCH_GATE_AREA
 122 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 123 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
 124 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 125 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
 126 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 127 #endif /* __HAVE_ARCH_GATE_AREA */
 128
 129 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 130 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
 131 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 132 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
 133 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 134
 135 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 136 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
 137                 struct page *, struct vm_area_struct * vma, \
 138                 unsigned long address, unsigned int foll_flags, \
 139                 unsigned int *page_mask);
 140 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
 141                                 struct vm_area_struct * vma, \
 142                                 unsigned long address, \
 143                                 unsigned int foll_flags, \
 144                                 unsigned int *page_mask)
 145 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
 146 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 147 static DECLARE_MOD_FUNC_DEP(follow_page, \
 148                 struct page *, struct vm_area_struct * vma, \
 149                 unsigned long address, unsigned int foll_flags);
 150 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
 151                                 struct vm_area_struct * vma, \
 152                                 unsigned long address, \
 153                                 unsigned int foll_flags)
 154 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 155 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 156
 157 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 158                 void, struct vm_area_struct *vma, struct page *page, \
 159                 unsigned long vmaddr);
 160 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 161                 struct page *, struct vm_area_struct *vma, \
 162                 unsigned long addr, pte_t pte);
 163 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
 164                 void, struct vm_area_struct *vma, struct page *page, \
 165                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
 166
 167
 168 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 169 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 170                 void, struct task_struct *tsk);
 171 #else
 172 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 173                 void, struct rcu_head * rhp);
 174 #endif
 175
 176         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 177 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
 178
 179         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
 180 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 181
 182 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 183 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 184         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 185                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 186 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 187 #endif
 188 #else
 189         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 190                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 191 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 192 #endif
 193
 194 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 195         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 196                         struct vm_area_struct *, struct mm_struct *mm)
 197 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 198 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 199         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 200                         struct vm_area_struct *, struct task_struct *tsk)
 201 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 202 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 203
 204 #ifdef CONFIG_HUGETLB_PAGE
 205         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
 206         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
 207 #endif
 208
 209 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
 210 {
 211 #ifdef __HAVE_ARCH_GATE_AREA
 212 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 213         struct mm_struct *mm = task->mm;
 214         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 215 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 216         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 218 #else /*__HAVE_ARCH_GATE_AREA */
 219         return in_gate_area(task, addr);
 220 #endif/*__HAVE_ARCH_GATE_AREA */
 221 }
 222
 223
 224 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 225 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
 226 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 227 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 228 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
 229 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 230 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 231
 232 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
 233 {
 234 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 235         return in_gate_area_no_mm(addr);
 236 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 237         return in_gate_area_no_task(addr);
 238 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 239 }
 240
 241 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 242                         void, struct vm_area_struct *vma, \
 243                         struct page *page, unsigned long vmaddr)
 244 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 245
 246 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 247                         struct page *, struct vm_area_struct *vma, \
 248                         unsigned long addr, pte_t pte)
 249 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 250
 251 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
 252         void, struct vm_area_struct *vma, struct page *page, \
 253         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 254 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 255
 256
 257
 258 int init_module_dependencies(void)
 259 {
 260
 261 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 262         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 263 #endif
 264
 265 #ifndef copy_to_user_page
 266         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 267 #endif /* copy_to_user_page */
 268
 269         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 270         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 271         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 272
 273 #ifdef CONFIG_HUGETLB_PAGE
 274         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 275 #endif
 276
 277 #ifdef  __HAVE_ARCH_GATE_AREA
 278         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 279 #endif
 280
 281 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 282         INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
 283 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 284         INIT_MOD_DEP_VAR(follow_page, follow_page);
 285 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 286
 287 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 288
 289 #ifndef is_zero_pfn
 290         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
 291 #endif /* is_zero_pfn */
 292
 293         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 294 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 295         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 296 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 297
 298         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 299         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 300         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 301
 302 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 303 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 304         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 305 # else
 306         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 307 # endif
 308 #else /*2.6.16 */
 309         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 310 #endif
 311 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 312         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 313 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 314
 315         return 0;
 316 }
 317
 318 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 319 #define GUP_FLAGS_WRITE                  0x1
 320 #define GUP_FLAGS_FORCE                  0x2
 321 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 322 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 323 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 324
 325 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 326 static inline int use_zero_page(struct vm_area_struct *vma)
 327 {
 328         /*
 329          * We don't want to optimize FOLL_ANON for make_pages_present()
 330          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 331          * we want to get the page from the page tables to make sure
 332          * that we serialize and update with any other user of that
 333          * mapping.
 334          */
 335         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 336                 return 0;
 337         /*
 338          * And if we have a fault routine, it's not an anonymous region.
 339          */
 340         return !vma->vm_ops || !vma->vm_ops->fault;
 341 }
 342
 343 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 344
 345 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 346
 347 #ifdef __HAVE_COLOR_ZERO_PAGE
 348
 349 static inline int swap_is_zero_pfn(unsigned long pfn)
 350 {
 351         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
 352         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 353 }
 354
 355 #else /* __HAVE_COLOR_ZERO_PAGE */
 356
 357 static inline int swap_is_zero_pfn(unsigned long pfn)
 358 {
 359         return pfn == swap_zero_pfn;
 360 }
 361 #endif /* __HAVE_COLOR_ZERO_PAGE */
 362
 363 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 364
 365 static inline int swap_is_zero_pfn(unsigned long pfn)
 366 {
 367 #ifndef is_zero_pfn
 368         return pfn == swap_zero_pfn;
 369 #else /* is_zero_pfn */
 370         return is_zero_pfn(pfn);
 371 #endif /* is_zero_pfn */
 372 }
 373
 374 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 375
 376 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 377 {
 378         return stack_guard_page_start(vma, addr) ||
 379                         stack_guard_page_end(vma, addr+PAGE_SIZE);
 380 }
 381
 382 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 383
 384 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
 385         unsigned long address, unsigned int foll_flags)
 386 {
 387     unsigned int unused_page_mask;
 388     return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
 389 }
 390
 391 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 392                 unsigned long start, unsigned long nr_pages,
 393                 unsigned int gup_flags, struct page **pages,
 394                 struct vm_area_struct **vmas, int *nonblocking)
 395 {
 396         long i;
 397         unsigned long vm_flags;
 398         unsigned int page_mask;
 399
 400         if (!nr_pages)
 401                 return 0;
 402
 403         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 404
 405         /*
 406          * Require read or write permissions.
 407          * If FOLL_FORCE is set, we only require the "MAY" flags.
 408          */
 409         vm_flags  = (gup_flags & FOLL_WRITE) ?
 410                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 411         vm_flags &= (gup_flags & FOLL_FORCE) ?
 412                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 413
 414         /*
 415          * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
 416          * would be called on PROT_NONE ranges. We must never invoke
 417          * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
 418          * page faults would unprotect the PROT_NONE ranges if
 419          * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
 420          * bitflag. So to avoid that, don't set FOLL_NUMA if
 421          * FOLL_FORCE is set.
 422          */
 423         if (!(gup_flags & FOLL_FORCE))
 424                 gup_flags |= FOLL_NUMA;
 425
 426         i = 0;
 427
 428         do {
 429                 struct vm_area_struct *vma;
 430
 431                 vma = find_extend_vma(mm, start);
 432                 if (!vma && dbi_in_gate_area(tsk, start)) {
 433                         unsigned long pg = start & PAGE_MASK;
 434                         pgd_t *pgd;
 435                         pud_t *pud;
 436                         pmd_t *pmd;
 437                         pte_t *pte;
 438
 439                         /* user gate pages are read-only */
 440                         if (gup_flags & FOLL_WRITE)
 441                                 return i ? : -EFAULT;
 442                         if (pg > TASK_SIZE)
 443                                 pgd = pgd_offset_k(pg);
 444                         else
 445                                 pgd = pgd_offset_gate(mm, pg);
 446                         BUG_ON(pgd_none(*pgd));
 447                         pud = pud_offset(pgd, pg);
 448                         BUG_ON(pud_none(*pud));
 449                         pmd = pmd_offset(pud, pg);
 450                         if (pmd_none(*pmd))
 451                                 return i ? : -EFAULT;
 452                         VM_BUG_ON(pmd_trans_huge(*pmd));
 453                         pte = pte_offset_map(pmd, pg);
 454                         if (pte_none(*pte)) {
 455                                 pte_unmap(pte);
 456                                 return i ? : -EFAULT;
 457                         }
 458                         vma = get_gate_vma(mm);
 459                         if (pages) {
 460                                 struct page *page;
 461
 462                                 page = vm_normal_page(vma, start, *pte);
 463                                 if (!page) {
 464                                         if (!(gup_flags & FOLL_DUMP) &&
 465                                              swap_is_zero_pfn(pte_pfn(*pte)))
 466                                                 page = pte_page(*pte);
 467                                         else {
 468                                                 pte_unmap(pte);
 469                                                 return i ? : -EFAULT;
 470                                         }
 471                                 }
 472                                 pages[i] = page;
 473                                 get_page(page);
 474                         }
 475                         pte_unmap(pte);
 476                         page_mask = 0;
 477                         goto next_page;
 478                 }
 479
 480                 if (!vma ||
 481                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 482                     !(vm_flags & vma->vm_flags))
 483                         return i ? : -EFAULT;
 484
 485                 if (is_vm_hugetlb_page(vma)) {
 486                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 487                                         &start, &nr_pages, i, gup_flags);
 488                         continue;
 489                 }
 490
 491                 do {
 492                         struct page *page;
 493                         unsigned int foll_flags = gup_flags;
 494                         unsigned int page_increm;
 495
 496                         /*
 497                          * If we have a pending SIGKILL, don't keep faulting
 498                          * pages and potentially allocating memory.
 499                          */
 500                         if (unlikely(fatal_signal_pending(current)))
 501                                 return i ? i : -ERESTARTSYS;
 502
 503                         /* cond_resched(); */
 504                         while (!(page = follow_page_mask(vma, start,
 505                                                 foll_flags, &page_mask))) {
 506                                 int ret;
 507                                 unsigned int fault_flags = 0;
 508
 509                                 /* For mlock, just skip the stack guard page. */
 510                                 if (foll_flags & FOLL_MLOCK) {
 511                                         if (stack_guard_page(vma, start))
 512                                                 goto next_page;
 513                                 }
 514                                 if (foll_flags & FOLL_WRITE)
 515                                         fault_flags |= FAULT_FLAG_WRITE;
 516                                 if (nonblocking)
 517                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 518                                 if (foll_flags & FOLL_NOWAIT)
 519                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 520
 521                                 ret = handle_mm_fault(mm, vma, start,
 522                                                         fault_flags);
 523
 524                                 if (ret & VM_FAULT_ERROR) {
 525                                         if (ret & VM_FAULT_OOM)
 526                                                 return i ? i : -ENOMEM;
 527                                         if (ret & (VM_FAULT_HWPOISON |
 528                                                    VM_FAULT_HWPOISON_LARGE)) {
 529                                                 if (i)
 530                                                         return i;
 531                                                 else if (gup_flags & FOLL_HWPOISON)
 532                                                         return -EHWPOISON;
 533                                                 else
 534                                                         return -EFAULT;
 535                                         }
 536                                         if (ret & VM_FAULT_SIGBUS)
 537                                                 return i ? i : -EFAULT;
 538                                         BUG();
 539                                 }
 540
 541                                 if (tsk) {
 542                                         if (ret & VM_FAULT_MAJOR)
 543                                                 tsk->maj_flt++;
 544                                         else
 545                                                 tsk->min_flt++;
 546                                 }
 547
 548                                 if (ret & VM_FAULT_RETRY) {
 549                                         if (nonblocking)
 550                                                 *nonblocking = 0;
 551                                         return i;
 552                                 }
 553
 554                                 /*
 555                                  * The VM_FAULT_WRITE bit tells us that
 556                                  * do_wp_page has broken COW when necessary,
 557                                  * even if maybe_mkwrite decided not to set
 558                                  * pte_write. We can thus safely do subsequent
 559                                  * page lookups as if they were reads. But only
 560                                  * do so when looping for pte_write is futile:
 561                                  * in some cases userspace may also be wanting
 562                                  * to write to the gotten user page, which a
 563                                  * read fault here might prevent (a readonly
 564                                  * page might get reCOWed by userspace write).
 565                                  */
 566                                 if ((ret & VM_FAULT_WRITE) &&
 567                                     !(vma->vm_flags & VM_WRITE))
 568                                         foll_flags &= ~FOLL_WRITE;
 569
 570                                 /* cond_resched(); */
 571                         }
 572                         if (IS_ERR(page))
 573                                 return i ? i : PTR_ERR(page);
 574                         if (pages) {
 575                                 pages[i] = page;
 576
 577                                 flush_anon_page(vma, page, start);
 578                                 flush_dcache_page(page);
 579                                 page_mask = 0;
 580                         }
 581 next_page:
 582                         if (vmas) {
 583                                 vmas[i] = vma;
 584                                 page_mask = 0;
 585                         }
 586                         page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
 587                         if (page_increm > nr_pages)
 588                                 page_increm = nr_pages;
 589                         i += page_increm;
 590                         start += page_increm * PAGE_SIZE;
 591                         nr_pages -= page_increm;
 592                 } while (nr_pages && start < vma->vm_end);
 593         } while (nr_pages);
 594         return i;
 595 }
 596
 597 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 598
 599 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 600                         unsigned long start, int nr_pages, unsigned int gup_flags,
 601                         struct page **pages, struct vm_area_struct **vmas,
 602                         int *nonblocking)
 603 {
 604         int i;
 605         unsigned long vm_flags;
 606
 607         if (nr_pages <= 0) {
 608                 return 0;
 609         }
 610
 611         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 612
 613         /*
 614          * Require read or write permissions.
 615          * If FOLL_FORCE is set, we only require the "MAY" flags.
 616          */
 617         vm_flags  = (gup_flags & FOLL_WRITE) ?
 618                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 619         vm_flags &= (gup_flags & FOLL_FORCE) ?
 620                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 621         i = 0;
 622
 623         do {
 624                 struct vm_area_struct *vma;
 625
 626                 vma = find_extend_vma(mm, start);
 627                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
 628                         unsigned long pg = start & PAGE_MASK;
 629                         pgd_t *pgd;
 630                         pud_t *pud;
 631                         pmd_t *pmd;
 632                         pte_t *pte;
 633
 634                         /* user gate pages are read-only */
 635                         if (gup_flags & FOLL_WRITE) {
 636                                 return i ? : -EFAULT;
 637                         }
 638                         if (pg > TASK_SIZE)
 639                                 pgd = pgd_offset_k(pg);
 640                         else
 641                                 pgd = pgd_offset_gate(mm, pg);
 642                         BUG_ON(pgd_none(*pgd));
 643                         pud = pud_offset(pgd, pg);
 644                         BUG_ON(pud_none(*pud));
 645                         pmd = pmd_offset(pud, pg);
 646                         if (pmd_none(*pmd)) {
 647                                 return i ? : -EFAULT;
 648                         }
 649                         VM_BUG_ON(pmd_trans_huge(*pmd));
 650                         pte = pte_offset_map(pmd, pg);
 651                         if (pte_none(*pte)) {
 652                                 pte_unmap(pte);
 653                                 return i ? : -EFAULT;
 654                         }
 655                         vma = get_gate_vma(mm);
 656                         if (pages) {
 657                                 struct page *page;
 658
 659                                 page = vm_normal_page(vma, start, *pte);
 660                                 if (!page) {
 661                                         if (!(gup_flags & FOLL_DUMP) &&
 662                                                 swap_is_zero_pfn(pte_pfn(*pte)))
 663                                                 page = pte_page(*pte);
 664                                         else {
 665                                                 pte_unmap(pte);
 666                                                 return i ? : -EFAULT;
 667                                         }
 668                                 }
 669                                 pages[i] = page;
 670                                 get_page(page);
 671                         }
 672                         pte_unmap(pte);
 673                         goto next_page;
 674                 }
 675
 676                 if (!vma ||
 677                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 678                         !(vm_flags & vma->vm_flags)) {
 679                         return i ? : -EFAULT;
 680                 }
 681
 682                 if (is_vm_hugetlb_page(vma)) {
 683                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 684                                         &start, &nr_pages, i, gup_flags);
 685                         continue;
 686                 }
 687
 688                 do {
 689                         struct page *page;
 690                         unsigned int foll_flags = gup_flags;
 691
 692                         /*
 693                          * If we have a pending SIGKILL, don't keep faulting
 694                          * pages and potentially allocating memory.
 695                          */
 696                         if (unlikely(fatal_signal_pending(current))) {
 697                                 return i ? i : -ERESTARTSYS;
 698                         }
 699
 700                         /* cond_resched(); */
 701                         while (!(page = follow_page(vma, start, foll_flags))) {
 702                                 int ret;
 703                                 unsigned int fault_flags = 0;
 704
 705                                 /* For mlock, just skip the stack guard page. */
 706                                 if (foll_flags & FOLL_MLOCK) {
 707                                         if (stack_guard_page(vma, start))
 708                                                 goto next_page;
 709                                 }
 710                                 if (foll_flags & FOLL_WRITE)
 711                                         fault_flags |= FAULT_FLAG_WRITE;
 712                                 if (nonblocking)
 713                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 714                                 if (foll_flags & FOLL_NOWAIT)
 715                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 716
 717                                 ret = handle_mm_fault(mm, vma, start,
 718                                                         fault_flags);
 719
 720                                 if (ret & VM_FAULT_ERROR) {
 721                                         if (ret & VM_FAULT_OOM) {
 722                                                 return i ? i : -ENOMEM;
 723                                         }
 724                                         if (ret & (VM_FAULT_HWPOISON |
 725                                                                 VM_FAULT_HWPOISON_LARGE)) {
 726                                                 if (i) {
 727                                                         return i;
 728                                                 }
 729                                                 else if (gup_flags & FOLL_HWPOISON) {
 730                                                         return -EHWPOISON;
 731                                                 }
 732                                                 else {
 733                                                         return -EFAULT;
 734                                                 }
 735                                         }
 736                                         if (ret & VM_FAULT_SIGBUS) {
 737                                                 return i ? i : -EFAULT;
 738                                         }
 739                                         BUG();
 740                                 }
 741
 742                                 if (tsk) {
 743                                         if (ret & VM_FAULT_MAJOR)
 744                                                 tsk->maj_flt++;
 745                                         else
 746                                                 tsk->min_flt++;
 747                                 }
 748
 749                                 if (ret & VM_FAULT_RETRY) {
 750                                         if (nonblocking)
 751                                                 *nonblocking = 0;
 752                                         return i;
 753                                 }
 754
 755                                 /*
 756                                  * The VM_FAULT_WRITE bit tells us that
 757                                  * do_wp_page has broken COW when necessary,
 758                                  * even if maybe_mkwrite decided not to set
 759                                  * pte_write. We can thus safely do subsequent
 760                                  * page lookups as if they were reads. But only
 761                                  * do so when looping for pte_write is futile:
 762                                  * in some cases userspace may also be wanting
 763                                  * to write to the gotten user page, which a
 764                                  * read fault here might prevent (a readonly
 765                                  * page might get reCOWed by userspace write).
 766                                  */
 767                                 if ((ret & VM_FAULT_WRITE) &&
 768                                         !(vma->vm_flags & VM_WRITE))
 769                                         foll_flags &= ~FOLL_WRITE;
 770
 771                                 /* cond_resched(); */
 772                         }
 773                         if (IS_ERR(page)) {
 774                                 return i ? i : PTR_ERR(page);
 775                         }
 776                         if (pages) {
 777                                 pages[i] = page;
 778
 779                                 flush_anon_page(vma, page, start);
 780                                 flush_dcache_page(page);
 781                         }
 782 next_page:
 783                         if (vmas)
 784                                 vmas[i] = vma;
 785                         i++;
 786                         start += PAGE_SIZE;
 787                         nr_pages--;
 788                 } while (nr_pages && start < vma->vm_end);
 789         } while (nr_pages);
 790
 791         return i;
 792 }
 793
 794 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 795
 796 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 797
 798 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 799                 unsigned long start, int len, int flags,
 800                 struct page **pages, struct vm_area_struct **vmas)
 801 {
 802         int i;
 803         unsigned int vm_flags = 0;
 804         int write = !!(flags & GUP_FLAGS_WRITE);
 805         int force = !!(flags & GUP_FLAGS_FORCE);
 806         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 807
 808         if (len <= 0)
 809                 return 0;
 810         /*
 811          * Require read or write permissions.
 812          * If 'force' is set, we only require the "MAY" flags.
 813          */
 814         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 815         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 816         i = 0;
 817
 818         do {
 819                 struct vm_area_struct *vma;
 820                 unsigned int foll_flags;
 821
 822                 vma = find_vma(mm, start);
 823                 if (!vma && dbi_in_gate_area(tsk, start)) {
 824                         unsigned long pg = start & PAGE_MASK;
 825                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 826                         pgd_t *pgd;
 827                         pud_t *pud;
 828                         pmd_t *pmd;
 829                         pte_t *pte;
 830
 831                         /* user gate pages are read-only */
 832                         if (!ignore && write)
 833                                 return i ? : -EFAULT;
 834                         if (pg > TASK_SIZE)
 835                                 pgd = pgd_offset_k(pg);
 836                         else
 837                                 pgd = pgd_offset_gate(mm, pg);
 838                         BUG_ON(pgd_none(*pgd));
 839                         pud = pud_offset(pgd, pg);
 840                         BUG_ON(pud_none(*pud));
 841                         pmd = pmd_offset(pud, pg);
 842                         if (pmd_none(*pmd))
 843                                 return i ? : -EFAULT;
 844                         pte = pte_offset_map(pmd, pg);
 845                         if (pte_none(*pte)) {
 846                                 pte_unmap(pte);
 847                                 return i ? : -EFAULT;
 848                         }
 849                         if (pages) {
 850                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 851                                 pages[i] = page;
 852                                 if (page)
 853                                         get_page(page);
 854                         }
 855                         pte_unmap(pte);
 856                         if (vmas)
 857                                 vmas[i] = gate_vma;
 858                         i++;
 859                         start += PAGE_SIZE;
 860                         len--;
 861                         continue;
 862                 }
 863
 864                 if (!vma ||
 865                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 866                         (!ignore && !(vm_flags & vma->vm_flags)))
 867                         return i ? : -EFAULT;
 868
 869                 if (is_vm_hugetlb_page(vma)) {
 870 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 871                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 872                                                 &start, &len, i);
 873 #else
 874                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 875                                                 &start, &len, i, write);
 876 #endif
 877                         continue;
 878                 }
 879
 880                 foll_flags = FOLL_TOUCH;
 881                 if (pages)
 882                         foll_flags |= FOLL_GET;
 883
 884 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 885 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 886                 if (!write && use_zero_page(vma))
 887                         foll_flags |= FOLL_ANON;
 888 #endif
 889 #endif
 890
 891                 do {
 892                         struct page *page;
 893
 894                         if (write)
 895                                 foll_flags |= FOLL_WRITE;
 896
 897
 898                         //cond_resched();
 899
 900                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 901                         while (!(page = follow_page(vma, start, foll_flags))) {
 902                                 int ret;
 903                                 ret = handle_mm_fault(mm, vma, start,
 904                                                 foll_flags & FOLL_WRITE);
 905
 906 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 907                                 if (ret & VM_FAULT_WRITE)
 908                                         foll_flags &= ~FOLL_WRITE;
 909
 910                                 switch (ret & ~VM_FAULT_WRITE) {
 911                                 case VM_FAULT_MINOR:
 912                                         tsk->min_flt++;
 913                                         break;
 914                                 case VM_FAULT_MAJOR:
 915                                         tsk->maj_flt++;
 916                                         break;
 917                                 case VM_FAULT_SIGBUS:
 918                                         return i ? i : -EFAULT;
 919                                 case VM_FAULT_OOM:
 920                                         return i ? i : -ENOMEM;
 921                                 default:
 922                                         BUG();
 923                                 }
 924
 925 #else
 926                                 if (ret & VM_FAULT_ERROR) {
 927                                         if (ret & VM_FAULT_OOM)
 928                                                 return i ? i : -ENOMEM;
 929                                         else if (ret & VM_FAULT_SIGBUS)
 930                                                 return i ? i : -EFAULT;
 931                                         BUG();
 932                                 }
 933                                 if (ret & VM_FAULT_MAJOR)
 934                                         tsk->maj_flt++;
 935                                 else
 936                                         tsk->min_flt++;
 937
 938                                 /*
 939                                  * The VM_FAULT_WRITE bit tells us that
 940                                  * do_wp_page has broken COW when necessary,
 941                                  * even if maybe_mkwrite decided not to set
 942                                  * pte_write. We can thus safely do subsequent
 943                                  * page lookups as if they were reads. But only
 944                                  * do so when looping for pte_write is futile:
 945                                  * in some cases userspace may also be wanting
 946                                  * to write to the gotten user page, which a
 947                                  * read fault here might prevent (a readonly
 948                                  * page might get reCOWed by userspace write).
 949                                  */
 950                                 if ((ret & VM_FAULT_WRITE) &&
 951                                                 !(vma->vm_flags & VM_WRITE))
 952                                         foll_flags &= ~FOLL_WRITE;
 953
 954                                 //cond_resched();
 955 #endif
 956
 957                         }
 958
 959                         if (IS_ERR(page))
 960                                 return i ? i : PTR_ERR(page);
 961                         if (pages) {
 962                                 pages[i] = page;
 963
 964 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 965                                 flush_anon_page(page, start);
 966 #else
 967                                 flush_anon_page(vma, page, start);
 968 #endif
 969                                 flush_dcache_page(page);
 970                         }
 971                         if (vmas)
 972                                 vmas[i] = vma;
 973                         i++;
 974                         start += PAGE_SIZE;
 975                         len--;
 976                 } while (len && start < vma->vm_end);
 977         } while (len);
 978         return i;
 979 }
 980 #endif
 981 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 982
 983 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 984                 unsigned long start, int len, int write, int force,
 985                 struct page **pages, struct vm_area_struct **vmas)
 986 {
 987 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 988 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
 989         int flags = FOLL_TOUCH;
 990
 991         if (pages)
 992                 flags |= FOLL_GET;
 993         if (write)
 994                 flags |= FOLL_WRITE;
 995         if (force)
 996                 flags |= FOLL_FORCE;
 997 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 998         int flags = 0;
 999
1000         if (write)
1001                 flags |= GUP_FLAGS_WRITE;
1002         if (force)
1003                 flags |= GUP_FLAGS_FORCE;
1004 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1005
1006         return __get_user_pages_uprobe(tsk, mm,
1007                                 start, len, flags,
1008 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1009                                                 pages, vmas, NULL);
1010 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1011                                                 pages, vmas);
1012 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1013 #else
1014         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1015 #endif
1016 }
1017
1018 #define ACCESS_PROCESS_OPTIMIZATION 0
1019
1020 #if ACCESS_PROCESS_OPTIMIZATION
1021
1022 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1023 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1024
1025 static void read_data_current(unsigned long addr, void *buf, int len)
1026 {
1027         int step;
1028         int pos = 0;
1029
1030         for (step = GET_STEP_4(len); len; len -= step) {
1031                 switch (GET_STEP_4(len)) {
1032                 case 1:
1033                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1034                         step = 1;
1035                         break;
1036
1037                 case 2:
1038                 case 3:
1039                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1040                         step = 2;
1041                         break;
1042
1043                 case 4:
1044                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1045                         step = 4;
1046                         break;
1047                 }
1048
1049                 pos += step;
1050         }
1051 }
1052
1053 // not working
1054 static void write_data_current(unsigned long addr, void *buf, int len)
1055 {
1056         int step;
1057         int pos = 0;
1058
1059         for (step = GET_STEP_4(len); len; len -= step) {
1060                 switch (GET_STEP_4(len)) {
1061                 case 1:
1062                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1063                         step = 1;
1064                         break;
1065
1066                 case 2:
1067                 case 3:
1068                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1069                         step = 2;
1070                         break;
1071
1072                 case 4:
1073                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1074                         step = 4;
1075                         break;
1076                 }
1077
1078                 pos += step;
1079         }
1080 }
1081 #endif
1082
1083 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1084 {
1085         struct mm_struct *mm;
1086         struct vm_area_struct *vma;
1087         void *old_buf = buf;
1088
1089         if (len <= 0) {
1090                 return -1;
1091         }
1092
1093 #if ACCESS_PROCESS_OPTIMIZATION
1094         if (write == 0 && tsk == current) {
1095                 read_data_current(addr, buf, len);
1096                 return len;
1097         }
1098 #endif
1099
1100         mm = tsk->mm; /* function 'get_task_mm' is to be called */
1101         if (!mm)
1102                 return 0;
1103
1104         /* ignore errors, just check how much was successfully transferred */
1105         while (len) {
1106                 int bytes, ret, offset;
1107                 void *maddr;
1108                 struct page *page = NULL;
1109
1110                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1111                                                 write, 1, &page, &vma);
1112
1113                 if (ret <= 0) {
1114                         /*
1115                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
1116                          * we can access using slightly different code.
1117                          */
1118 #ifdef CONFIG_HAVE_IOREMAP_PROT
1119                         vma = find_vma(mm, addr);
1120                         if (!vma)
1121                                 break;
1122                         if (vma->vm_ops && vma->vm_ops->access)
1123                                 ret = vma->vm_ops->access(vma, addr, buf,
1124                                                         len, write);
1125                         if (ret <= 0)
1126 #endif
1127                                 break;
1128                         bytes = ret;
1129                 } else {
1130                         bytes = len;
1131                         offset = addr & (PAGE_SIZE-1);
1132                         if (bytes > PAGE_SIZE-offset)
1133                                 bytes = PAGE_SIZE-offset;
1134
1135                         maddr = dbi_kmap_atomic(page);
1136
1137                         if (write) {
1138                                 copy_to_user_page(vma, page, addr,
1139                                                         maddr + offset, buf, bytes);
1140                                 set_page_dirty_lock(page);
1141                         } else {
1142                                 copy_from_user_page(vma, page, addr,
1143                                                         buf, maddr + offset, bytes);
1144                         }
1145
1146                         dbi_kunmap_atomic(maddr);
1147                         page_cache_release(page);
1148                 }
1149                 len -= bytes;
1150                 buf += bytes;
1151                 addr += bytes;
1152         }
1153
1154         return buf - old_buf;
1155 }
1156
1157 int page_present (struct mm_struct *mm, unsigned long address)
1158 {
1159         pgd_t *pgd;
1160         pud_t *pud;
1161         pmd_t *pmd;
1162         pte_t *ptep, pte;
1163         unsigned long pfn;
1164
1165         pgd = pgd_offset(mm, address);
1166         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1167                 goto out;
1168
1169         pud = pud_offset(pgd, address);
1170         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1171                 goto out;
1172
1173         pmd = pmd_offset(pud, address);
1174         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1175                 goto out;
1176
1177         ptep = pte_offset_map(pmd, address);
1178         if (!ptep)
1179                 goto out;
1180
1181         pte = *ptep;
1182         pte_unmap(ptep);
1183         if (pte_present(pte)) {
1184                 pfn = pte_pfn(pte);
1185                 if (pfn_valid(pfn)) {
1186                         return 1;
1187                 }
1188         }
1189
1190 out:
1191         return 0;
1192 }
1193
1194
1195 EXPORT_SYMBOL_GPL (page_present);
1196 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
1197