kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25  */
  26
  27 #include <linux/module.h>
  28 #include <linux/sched.h>
  29
  30 #include <asm/pgtable.h>
  31
  32 #include "dbi_kprobes_deps.h"
  33 #include "dbi_kdebug.h"
  34
  35
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38
  39 unsigned long sched_addr;
  40 unsigned long fork_addr;
  41
  42 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  43 struct mm_struct* init_mm_ptr;
  44 struct mm_struct init_mm;
  45 #endif
  46
  47
  48 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
  49 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  50 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
  51 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
  52 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  53
  54 /* copy_to_user_page */
  55 #ifndef copy_to_user_page
  56 DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
  57 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
  58 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
  59 #endif /* copy_to_user_page */
  60
  61
  62 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  63
  64 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
  65
  66 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  67 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
  68 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
  69 #endif
  70 #else
  71 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
  72 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
  73
  74 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
  75 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
  76 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  77 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
  78 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  79
  80 #ifdef CONFIG_HUGETLB_PAGE
  81 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
  82 #endif
  83
  84 #ifdef __HAVE_ARCH_GATE_AREA
  85 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
  86 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
  87 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  88 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
  89 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  90 #endif /* __HAVE_ARCH_GATE_AREA */
  91
  92 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
  93 DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
  94 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  95 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
  96 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
  97
  98
  99 DECLARE_MOD_FUNC_DEP(follow_page, \
 100                 struct page *, struct vm_area_struct * vma, \
 101                 unsigned long address, unsigned int foll_flags);
 102 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 103                 void, struct vm_area_struct *vma, struct page *page, \
 104                 unsigned long vmaddr);
 105 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 106                 struct page *, struct vm_area_struct *vma, \
 107                 unsigned long addr, pte_t pte);
 108
 109
 110 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 111 DECLARE_MOD_FUNC_DEP(put_task_struct, \
 112                 void, struct task_struct *tsk);
 113 #else
 114 DECLARE_MOD_FUNC_DEP(put_task_struct, \
 115                 void, struct rcu_head * rhp);
 116 #endif
 117
 118         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 119 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
 120
 121         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
 122 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 123
 124 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 125 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 126         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 127                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 128 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 129 #endif
 130 #else
 131         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 132                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 133 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 134 #endif
 135
 136 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 137         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 138                         struct vm_area_struct *, struct mm_struct *mm)
 139 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 140 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 141         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 142                         struct vm_area_struct *, struct task_struct *tsk)
 143 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 144 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 145
 146 #ifdef CONFIG_HUGETLB_PAGE
 147         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
 148         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
 149 #endif
 150
 151 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
 152 {
 153 #ifdef __HAVE_ARCH_GATE_AREA
 154 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 155         struct mm_struct *mm = task->mm;
 156         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 157 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 158         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 159 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 160 #else /*__HAVE_ARCH_GATE_AREA */
 161         return in_gate_area(task, addr);
 162 #endif/*__HAVE_ARCH_GATE_AREA */
 163 }
 164
 165
 166 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 167 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
 168 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 169 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 170 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
 171 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 172 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 173
 174 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
 175 {
 176 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 177         return in_gate_area_no_mm(addr);
 178 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 179         return in_gate_area_no_task(addr);
 180 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 181 }
 182
 183
 184 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
 185         DECLARE_MOD_DEP_WRAPPER (follow_page, \
 186                         struct page *, struct vm_area_struct * vma, \
 187                         unsigned long address, unsigned int foll_flags)
 188 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 189 #endif
 190         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 191                         void, struct vm_area_struct *vma, \
 192                         struct page *page, unsigned long vmaddr)
 193 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 194
 195         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 196                         struct page *, struct vm_area_struct *vma, \
 197                         unsigned long addr, pte_t pte)
 198 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 199
 200
 201 int init_module_dependencies()
 202 {
 203
 204 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
 205         init_mm_ptr = (struct mm_struct*)swap_ksyms("init_mm");
 206         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
 207 #endif
 208
 209 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 210         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 211 #endif
 212
 213 #ifndef copy_to_user_page
 214         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 215 #endif /* copy_to_user_page */
 216
 217         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 218         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 219
 220 #ifdef CONFIG_HUGETLB_PAGE
 221         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 222 #endif
 223
 224 #ifdef  __HAVE_ARCH_GATE_AREA
 225         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 226 #endif
 227
 228 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
 229         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 230 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 231         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 232 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 233
 234         INIT_MOD_DEP_VAR(follow_page, follow_page);
 235
 236         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 237         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 238         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 239
 240 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 241 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 242         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 243 # else
 244         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 245 # endif
 246 #else /*2.6.16 */
 247         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 248 #endif
 249
 250 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
 251         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 252 #endif
 253
 254         return 0;
 255 }
 256
 257 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 258 #define GUP_FLAGS_WRITE                  0x1
 259 #define GUP_FLAGS_FORCE                  0x2
 260 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 261 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 262 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 263
 264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 265 static inline int use_zero_page(struct vm_area_struct *vma)
 266 {
 267         /*
 268          * We don't want to optimize FOLL_ANON for make_pages_present()
 269          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 270          * we want to get the page from the page tables to make sure
 271          * that we serialize and update with any other user of that
 272          * mapping.
 273          */
 274         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 275                 return 0;
 276         /*
 277          * And if we have a fault routine, it's not an anonymous region.
 278          */
 279         return !vma->vm_ops || !vma->vm_ops->fault;
 280 }
 281
 282
 283 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 284 unsigned long zero_pfn __read_mostly;
 285
 286 #ifndef is_zero_pfn
 287 static inline int is_zero_pfn(unsigned long pfn)
 288 {
 289         return pfn == zero_pfn;
 290 }
 291 #endif
 292
 293 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 294 {
 295         return stack_guard_page_start(vma, addr) ||
 296                stack_guard_page_end(vma, addr+PAGE_SIZE);
 297 }
 298
 299 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 300                      unsigned long start, int nr_pages, unsigned int gup_flags,
 301                      struct page **pages, struct vm_area_struct **vmas,
 302                      int *nonblocking)
 303 {
 304         int i;
 305         unsigned long vm_flags;
 306
 307         if (nr_pages <= 0) {
 308                 return 0;
 309         }
 310
 311         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 312
 313         /*
 314          * Require read or write permissions.
 315          * If FOLL_FORCE is set, we only require the "MAY" flags.
 316          */
 317         vm_flags  = (gup_flags & FOLL_WRITE) ?
 318                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 319         vm_flags &= (gup_flags & FOLL_FORCE) ?
 320                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 321         i = 0;
 322
 323         do {
 324                 struct vm_area_struct *vma;
 325
 326                 vma = find_extend_vma(mm, start);
 327                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
 328                         unsigned long pg = start & PAGE_MASK;
 329                         pgd_t *pgd;
 330                         pud_t *pud;
 331                         pmd_t *pmd;
 332                         pte_t *pte;
 333
 334                         /* user gate pages are read-only */
 335                         if (gup_flags & FOLL_WRITE) {
 336                                 return i ? : -EFAULT;
 337                         }
 338                         if (pg > TASK_SIZE)
 339                                 pgd = pgd_offset_k(pg);
 340                         else
 341                                 pgd = pgd_offset_gate(mm, pg);
 342                         BUG_ON(pgd_none(*pgd));
 343                         pud = pud_offset(pgd, pg);
 344                         BUG_ON(pud_none(*pud));
 345                         pmd = pmd_offset(pud, pg);
 346                         if (pmd_none(*pmd)) {
 347                                 return i ? : -EFAULT;
 348                         }
 349                         VM_BUG_ON(pmd_trans_huge(*pmd));
 350                         pte = pte_offset_map(pmd, pg);
 351                         if (pte_none(*pte)) {
 352                                 pte_unmap(pte);
 353                                 return i ? : -EFAULT;
 354                         }
 355                         vma = get_gate_vma(mm);
 356                         if (pages) {
 357                                 struct page *page;
 358
 359                                 page = vm_normal_page(vma, start, *pte);
 360                                 if (!page) {
 361                                         if (!(gup_flags & FOLL_DUMP) &&
 362                                              is_zero_pfn(pte_pfn(*pte)))
 363                                                 page = pte_page(*pte);
 364                                         else {
 365                                                 pte_unmap(pte);
 366                                                 return i ? : -EFAULT;
 367                                         }
 368                                 }
 369                                 pages[i] = page;
 370                                 get_page(page);
 371                         }
 372                         pte_unmap(pte);
 373                         goto next_page;
 374                 }
 375
 376                 if (!vma ||
 377                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 378                     !(vm_flags & vma->vm_flags)) {
 379                         return i ? : -EFAULT;
 380                 }
 381
 382                 if (is_vm_hugetlb_page(vma)) {
 383                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 384                                         &start, &nr_pages, i, gup_flags);
 385                         continue;
 386                 }
 387
 388                 do {
 389                         struct page *page;
 390                         unsigned int foll_flags = gup_flags;
 391
 392                         /*
 393                          * If we have a pending SIGKILL, don't keep faulting
 394                          * pages and potentially allocating memory.
 395                          */
 396                         if (unlikely(fatal_signal_pending(current))) {
 397                                 return i ? i : -ERESTARTSYS;
 398                         }
 399
 400                         /* cond_resched(); */
 401                         while (!(page = follow_page(vma, start, foll_flags))) {
 402                                 int ret;
 403                                 unsigned int fault_flags = 0;
 404
 405                                 /* For mlock, just skip the stack guard page. */
 406                                 if (foll_flags & FOLL_MLOCK) {
 407                                         if (stack_guard_page(vma, start))
 408                                                 goto next_page;
 409                                 }
 410                                 if (foll_flags & FOLL_WRITE)
 411                                         fault_flags |= FAULT_FLAG_WRITE;
 412                                 if (nonblocking)
 413                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 414                                 if (foll_flags & FOLL_NOWAIT)
 415                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 416
 417                                 ret = handle_mm_fault(mm, vma, start,
 418                                                         fault_flags);
 419
 420                                 if (ret & VM_FAULT_ERROR) {
 421                                         if (ret & VM_FAULT_OOM) {
 422                                                 return i ? i : -ENOMEM;
 423                                         }
 424                                         if (ret & (VM_FAULT_HWPOISON |
 425                                                    VM_FAULT_HWPOISON_LARGE)) {
 426                                                 if (i) {
 427                                                         return i;
 428                                                 }
 429                                                 else if (gup_flags & FOLL_HWPOISON) {
 430                                                         return -EHWPOISON;
 431                                                 }
 432                                                 else {
 433                                                         return -EFAULT;
 434                                                 }
 435                                         }
 436                                         if (ret & VM_FAULT_SIGBUS) {
 437                                                 return i ? i : -EFAULT;
 438                                         }
 439                                         BUG();
 440                                 }
 441
 442                                 if (tsk) {
 443                                         if (ret & VM_FAULT_MAJOR)
 444                                                 tsk->maj_flt++;
 445                                         else
 446                                                 tsk->min_flt++;
 447                                 }
 448
 449                                 if (ret & VM_FAULT_RETRY) {
 450                                         if (nonblocking)
 451                                                 *nonblocking = 0;
 452                                         return i;
 453                                 }
 454
 455                                 /*
 456                                  * The VM_FAULT_WRITE bit tells us that
 457                                  * do_wp_page has broken COW when necessary,
 458                                  * even if maybe_mkwrite decided not to set
 459                                  * pte_write. We can thus safely do subsequent
 460                                  * page lookups as if they were reads. But only
 461                                  * do so when looping for pte_write is futile:
 462                                  * in some cases userspace may also be wanting
 463                                  * to write to the gotten user page, which a
 464                                  * read fault here might prevent (a readonly
 465                                  * page might get reCOWed by userspace write).
 466                                  */
 467                                 if ((ret & VM_FAULT_WRITE) &&
 468                                     !(vma->vm_flags & VM_WRITE))
 469                                         foll_flags &= ~FOLL_WRITE;
 470
 471                                 /* cond_resched(); */
 472                         }
 473                         if (IS_ERR(page)) {
 474                                 return i ? i : PTR_ERR(page);
 475                         }
 476                         if (pages) {
 477                                 pages[i] = page;
 478
 479                                 flush_anon_page(vma, page, start);
 480                                 flush_dcache_page(page);
 481                         }
 482 next_page:
 483                         if (vmas)
 484                                 vmas[i] = vma;
 485                         i++;
 486                         start += PAGE_SIZE;
 487                         nr_pages--;
 488                 } while (nr_pages && start < vma->vm_end);
 489         } while (nr_pages);
 490
 491         return i;
 492 }
 493 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 494
 495 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 496                      unsigned long start, int len, int flags,
 497                 struct page **pages, struct vm_area_struct **vmas)
 498 {
 499         int i;
 500         unsigned int vm_flags = 0;
 501         int write = !!(flags & GUP_FLAGS_WRITE);
 502         int force = !!(flags & GUP_FLAGS_FORCE);
 503         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 504         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 505
 506         if (len <= 0)
 507                 return 0;
 508         /*
 509          * Require read or write permissions.
 510          * If 'force' is set, we only require the "MAY" flags.
 511          */
 512         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 513         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 514         i = 0;
 515
 516         do {
 517                 struct vm_area_struct *vma;
 518                 unsigned int foll_flags;
 519
 520                 vma = find_vma(mm, start);
 521                 if (!vma && dbi_in_gate_area(tsk, start)) {
 522                         unsigned long pg = start & PAGE_MASK;
 523                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 524                         pgd_t *pgd;
 525                         pud_t *pud;
 526                         pmd_t *pmd;
 527                         pte_t *pte;
 528
 529                         /* user gate pages are read-only */
 530                         if (!ignore && write)
 531                                 return i ? : -EFAULT;
 532                         if (pg > TASK_SIZE)
 533                                 pgd = pgd_offset_k(pg);
 534                         else
 535                                 pgd = pgd_offset_gate(mm, pg);
 536                         BUG_ON(pgd_none(*pgd));
 537                         pud = pud_offset(pgd, pg);
 538                         BUG_ON(pud_none(*pud));
 539                         pmd = pmd_offset(pud, pg);
 540                         if (pmd_none(*pmd))
 541                                 return i ? : -EFAULT;
 542                         pte = pte_offset_map(pmd, pg);
 543                         if (pte_none(*pte)) {
 544                                 pte_unmap(pte);
 545                                 return i ? : -EFAULT;
 546                         }
 547                         if (pages) {
 548                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 549                                 pages[i] = page;
 550                                 if (page)
 551                                         get_page(page);
 552                         }
 553                         pte_unmap(pte);
 554                         if (vmas)
 555                                 vmas[i] = gate_vma;
 556                         i++;
 557                         start += PAGE_SIZE;
 558                         len--;
 559                         continue;
 560                 }
 561
 562                 if (!vma ||
 563                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 564                     (!ignore && !(vm_flags & vma->vm_flags)))
 565                         return i ? : -EFAULT;
 566
 567                 if (is_vm_hugetlb_page(vma)) {
 568 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 569                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 570                                                 &start, &len, i);
 571 #else
 572                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 573                                                 &start, &len, i, write);
 574 #endif
 575                         continue;
 576                 }
 577
 578                 foll_flags = FOLL_TOUCH;
 579                 if (pages)
 580                         foll_flags |= FOLL_GET;
 581
 582 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 583 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 584                 if (!write && use_zero_page(vma))
 585                   foll_flags |= FOLL_ANON;
 586 #endif
 587 #endif
 588
 589                 do {
 590                         struct page *page;
 591
 592 #if 0
 593                         /*
 594                          * If we have a pending SIGKILL, don't keep faulting
 595                          * pages and potentially allocating memory, unless
 596                          * current is handling munlock--e.g., on exit. In
 597                          * that case, we are not allocating memory.  Rather,
 598                          * we're only unlocking already resident/mapped pages.
 599                          */
 600                         if (unlikely(!ignore_sigkill &&
 601                                         fatal_signal_pending(current)))
 602                                 return i ? i : -ERESTARTSYS;
 603 #endif
 604
 605                         if (write)
 606                                 foll_flags |= FOLL_WRITE;
 607
 608
 609                         //cond_resched();
 610
 611                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 612                         while (!(page = follow_page(vma, start, foll_flags))) {
 613                                 int ret;
 614                                 ret = handle_mm_fault(mm, vma, start,
 615                                                 foll_flags & FOLL_WRITE);
 616
 617 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 618                                 if (ret & VM_FAULT_WRITE)
 619                                   foll_flags &= ~FOLL_WRITE;
 620
 621                                 switch (ret & ~VM_FAULT_WRITE) {
 622                                 case VM_FAULT_MINOR:
 623                                   tsk->min_flt++;
 624                                   break;
 625                                 case VM_FAULT_MAJOR:
 626                                   tsk->maj_flt++;
 627                                   break;
 628                                 case VM_FAULT_SIGBUS:
 629                                   return i ? i : -EFAULT;
 630                                 case VM_FAULT_OOM:
 631                                   return i ? i : -ENOMEM;
 632                                 default:
 633                                   BUG();
 634                                 }
 635
 636 #else
 637                                 if (ret & VM_FAULT_ERROR) {
 638                                   if (ret & VM_FAULT_OOM)
 639                                     return i ? i : -ENOMEM;
 640                                   else if (ret & VM_FAULT_SIGBUS)
 641                                     return i ? i : -EFAULT;
 642                                   BUG();
 643                                 }
 644                                 if (ret & VM_FAULT_MAJOR)
 645                                   tsk->maj_flt++;
 646                                 else
 647                                   tsk->min_flt++;
 648
 649                                 /*
 650                                  * The VM_FAULT_WRITE bit tells us that
 651                                  * do_wp_page has broken COW when necessary,
 652                                  * even if maybe_mkwrite decided not to set
 653                                  * pte_write. We can thus safely do subsequent
 654                                  * page lookups as if they were reads. But only
 655                                  * do so when looping for pte_write is futile:
 656                                  * in some cases userspace may also be wanting
 657                                  * to write to the gotten user page, which a
 658                                  * read fault here might prevent (a readonly
 659                                  * page might get reCOWed by userspace write).
 660                                  */
 661                                 if ((ret & VM_FAULT_WRITE) &&
 662                                     !(vma->vm_flags & VM_WRITE))
 663                                   foll_flags &= ~FOLL_WRITE;
 664
 665                                 //cond_resched();
 666 #endif
 667
 668                         }
 669
 670                         if (IS_ERR(page))
 671                                 return i ? i : PTR_ERR(page);
 672                         if (pages) {
 673                                 pages[i] = page;
 674
 675 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 676                                 flush_anon_page(page, start);
 677 #else
 678                                 flush_anon_page(vma, page, start);
 679 #endif
 680                                 flush_dcache_page(page);
 681                         }
 682                         if (vmas)
 683                                 vmas[i] = vma;
 684                         i++;
 685                         start += PAGE_SIZE;
 686                         len--;
 687                 } while (len && start < vma->vm_end);
 688         } while (len);
 689         return i;
 690 }
 691 #endif
 692 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 693
 694 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 695                 unsigned long start, int len, int write, int force,
 696                 struct page **pages, struct vm_area_struct **vmas)
 697 {
 698 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 699 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
 700         int flags = FOLL_TOUCH;
 701
 702         if (pages)
 703                 flags |= FOLL_GET;
 704         if (write)
 705                 flags |= FOLL_WRITE;
 706         if (force)
 707                 flags |= FOLL_FORCE;
 708 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 709         int flags = 0;
 710
 711         if (write)
 712                 flags |= GUP_FLAGS_WRITE;
 713         if (force)
 714                 flags |= GUP_FLAGS_FORCE;
 715 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 716
 717         return __get_user_pages_uprobe(tsk, mm,
 718                                 start, len, flags,
 719 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 720                                        pages, vmas, 0);
 721 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 722                                        pages, vmas);
 723 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 724 #else
 725         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
 726 #endif
 727 }
 728
 729
 730 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 731 {
 732         struct mm_struct *mm;
 733         struct vm_area_struct *vma;
 734         void *old_buf = buf;
 735
 736         mm = get_task_mm(tsk);
 737         if (!mm)
 738                 return 0;
 739
 740         /* down_read(&mm->mmap_sem); */
 741         /* ignore errors, just check how much was successfully transferred */
 742         while (len) {
 743                 int bytes, ret, offset;
 744                 void *maddr;
 745                 struct page *page = NULL;
 746
 747                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
 748                                             write, 1, &page, &vma);
 749
 750                 if (ret <= 0) {
 751                         /*
 752                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
 753                          * we can access using slightly different code.
 754                          */
 755 #ifdef CONFIG_HAVE_IOREMAP_PROT
 756                         vma = find_vma(mm, addr);
 757                         if (!vma)
 758                                 break;
 759                         if (vma->vm_ops && vma->vm_ops->access)
 760                                 ret = vma->vm_ops->access(vma, addr, buf,
 761                                                           len, write);
 762                         if (ret <= 0)
 763 #endif
 764                                 break;
 765                         bytes = ret;
 766                 } else {
 767                         bytes = len;
 768                         offset = addr & (PAGE_SIZE-1);
 769                         if (bytes > PAGE_SIZE-offset)
 770                                 bytes = PAGE_SIZE-offset;
 771
 772                         maddr = kmap(page);
 773                         if (write) {
 774                                 copy_to_user_page(vma, page, addr,
 775                                                   maddr + offset, buf, bytes);
 776                                 set_page_dirty_lock(page);
 777                         } else {
 778                                 copy_from_user_page(vma, page, addr,
 779                                                     buf, maddr + offset, bytes);
 780                         }
 781                         kunmap(page);
 782                         page_cache_release(page);
 783                 }
 784                 len -= bytes;
 785                 buf += bytes;
 786                 addr += bytes;
 787         }
 788         /* up_read(&mm->mmap_sem); */
 789         mmput(mm);
 790
 791         return buf - old_buf;
 792 }
 793
 794 int page_present (struct mm_struct *mm, unsigned long address)
 795 {
 796         pgd_t *pgd;
 797         pud_t *pud;
 798         pmd_t *pmd;
 799         pte_t *ptep, pte;
 800         unsigned long pfn;
 801
 802         pgd = pgd_offset(mm, address);
 803         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 804                 goto out;
 805
 806         pud = pud_offset(pgd, address);
 807         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
 808                 goto out;
 809
 810         pmd = pmd_offset(pud, address);
 811         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
 812                 goto out;
 813
 814         ptep = pte_offset_map(pmd, address);
 815         if (!ptep)
 816                 goto out;
 817
 818         pte = *ptep;
 819         pte_unmap(ptep);
 820         if (pte_present(pte)) {
 821                 pfn = pte_pfn(pte);
 822                 if (pfn_valid(pfn)) {
 823                         return 1;
 824                 }
 825         }
 826
 827 out:
 828         return 0;
 829 }
 830
 831
 832 EXPORT_SYMBOL_GPL (page_present);
 833 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
 834 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
 835