kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/sched.h>
  30
  31 #include <asm/pgtable.h>
  32
  33 #include "dbi_kprobes_deps.h"
  34 #include "dbi_kdebug.h"
  35
  36
  37 unsigned int *sched_addr;
  38 unsigned int *fork_addr;
  39
  40
  41 #define GUP_FLAGS_WRITE                  0x1
  42 #define GUP_FLAGS_WRITE                  0x1
  43 #define GUP_FLAGS_FORCE                  0x2
  44 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
  45 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
  46
  47 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  48 struct mm_struct* init_mm_ptr;
  49 struct mm_struct init_mm;
  50 #endif
  51
  52
  53 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
  54
  55 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  56
  57 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
  58
  59 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  60 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
  61 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
  62 #endif
  63 #else
  64 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
  65 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
  66
  67 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
  68 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
  69 DECLARE_MOD_FUNC_DEP(follow_page, \
  70                 struct page *, struct vm_area_struct * vma, \
  71                 unsigned long address, unsigned int foll_flags);
  72 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
  73                 void, struct vm_area_struct *vma, struct page *page, \
  74                 unsigned long vmaddr);
  75 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
  76                 struct page *, struct vm_area_struct *vma, \
  77                 unsigned long addr, pte_t pte);
  78
  79 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
  80                 void, struct vm_area_struct *vma, struct page *page, \
  81                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
  82
  83
  84 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
  85 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  86                 void, struct task_struct *tsk);
  87 #else
  88 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  89                 void, struct rcu_head * rhp);
  90 #endif
  91
  92         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
  93 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
  94
  95         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
  96 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
  97
  98 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  99 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 100         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 101                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 102 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 103 #endif
 104 #else
 105         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 106                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 107 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 108 #endif
 109
 110         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 111                         struct vm_area_struct *, struct task_struct *tsk)
 112 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 113
 114         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
 115 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
 116
 117         DECLARE_MOD_DEP_WRAPPER (follow_page, \
 118                         struct page *, struct vm_area_struct * vma, \
 119                         unsigned long address, unsigned int foll_flags)
 120 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 121
 122         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 123                         void, struct vm_area_struct *vma, \
 124                         struct page *page, unsigned long vmaddr)
 125 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 126
 127         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 128                         struct page *, struct vm_area_struct *vma, \
 129                         unsigned long addr, pte_t pte)
 130 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 131
 132         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
 133                         void, struct vm_area_struct *vma, struct page *page, \
 134                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 135 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 136
 137 int init_module_dependencies()
 138 {
 139
 140 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
 141         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
 142         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
 143 #endif
 144
 145 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 146         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 147 #endif
 148
 149         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 150         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 151         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 152         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 153         INIT_MOD_DEP_VAR(follow_page, follow_page);
 154         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 155         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 156         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 157
 158 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 159 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 160         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 161 # else
 162         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 163 # endif
 164 #else /*2.6.16 */
 165         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 166 #endif
 167
 168         return 0;
 169 }
 170
 171 #define GUP_FLAGS_WRITE                  0x1
 172 #define GUP_FLAGS_FORCE                  0x2
 173 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 174 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 175
 176 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 177 static inline int use_zero_page(struct vm_area_struct *vma)
 178 {
 179         /*
 180          * We don't want to optimize FOLL_ANON for make_pages_present()
 181          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 182          * we want to get the page from the page tables to make sure
 183          * that we serialize and update with any other user of that
 184          * mapping.
 185          */
 186         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 187                 return 0;
 188         /*
 189          * And if we have a fault routine, it's not an anonymous region.
 190          */
 191         return !vma->vm_ops || !vma->vm_ops->fault;
 192 }
 193
 194 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 195                      unsigned long start, int len, int flags,
 196                 struct page **pages, struct vm_area_struct **vmas)
 197 {
 198         int i;
 199         unsigned int vm_flags = 0;
 200         int write = !!(flags & GUP_FLAGS_WRITE);
 201         int force = !!(flags & GUP_FLAGS_FORCE);
 202         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 203         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 204
 205         if (len <= 0)
 206                 return 0;
 207         /*
 208          * Require read or write permissions.
 209          * If 'force' is set, we only require the "MAY" flags.
 210          */
 211         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 212         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 213         i = 0;
 214
 215         do {
 216                 struct vm_area_struct *vma;
 217                 unsigned int foll_flags;
 218
 219                 //vma = find_extend_vma(mm, start);
 220                 vma = find_vma(mm, start);
 221                 if (!vma && in_gate_area(tsk, start)) {
 222                         unsigned long pg = start & PAGE_MASK;
 223                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 224                         pgd_t *pgd;
 225                         pud_t *pud;
 226                         pmd_t *pmd;
 227                         pte_t *pte;
 228
 229                         /* user gate pages are read-only */
 230                         if (!ignore && write)
 231                                 return i ? : -EFAULT;
 232                         if (pg > TASK_SIZE)
 233                                 pgd = pgd_offset_k(pg);
 234                         else
 235                                 pgd = pgd_offset_gate(mm, pg);
 236                         BUG_ON(pgd_none(*pgd));
 237                         pud = pud_offset(pgd, pg);
 238                         BUG_ON(pud_none(*pud));
 239                         pmd = pmd_offset(pud, pg);
 240                         if (pmd_none(*pmd))
 241                                 return i ? : -EFAULT;
 242                         pte = pte_offset_map(pmd, pg);
 243                         if (pte_none(*pte)) {
 244                                 pte_unmap(pte);
 245                                 return i ? : -EFAULT;
 246                         }
 247                         if (pages) {
 248                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 249                                 pages[i] = page;
 250                                 if (page)
 251                                         get_page(page);
 252                         }
 253                         pte_unmap(pte);
 254                         if (vmas)
 255                                 vmas[i] = gate_vma;
 256                         i++;
 257                         start += PAGE_SIZE;
 258                         len--;
 259                         continue;
 260                 }
 261
 262                 if (!vma ||
 263                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 264                     (!ignore && !(vm_flags & vma->vm_flags)))
 265                         return i ? : -EFAULT;
 266
 267                 if (is_vm_hugetlb_page(vma)) {
 268 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 269                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 270                                                 &start, &len, i);
 271 #else
 272                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 273                                                 &start, &len, i, write);
 274 #endif
 275                         continue;
 276                 }
 277
 278                 foll_flags = FOLL_TOUCH;
 279                 if (pages)
 280                         foll_flags |= FOLL_GET;
 281
 282 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 283 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 284                 if (!write && use_zero_page(vma))
 285                   foll_flags |= FOLL_ANON;
 286 #endif
 287 #endif
 288
 289                 do {
 290                         struct page *page;
 291
 292 #if 0
 293                         /*
 294                          * If we have a pending SIGKILL, don't keep faulting
 295                          * pages and potentially allocating memory, unless
 296                          * current is handling munlock--e.g., on exit. In
 297                          * that case, we are not allocating memory.  Rather,
 298                          * we're only unlocking already resident/mapped pages.
 299                          */
 300                         if (unlikely(!ignore_sigkill &&
 301                                         fatal_signal_pending(current)))
 302                                 return i ? i : -ERESTARTSYS;
 303 #endif
 304
 305                         if (write)
 306                                 foll_flags |= FOLL_WRITE;
 307
 308
 309                         //cond_resched();
 310
 311                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 312                         while (!(page = follow_page(vma, start, foll_flags))) {
 313                                 int ret;
 314                                 ret = handle_mm_fault(mm, vma, start,
 315                                                 foll_flags & FOLL_WRITE);
 316
 317 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 318                                 if (ret & VM_FAULT_WRITE)
 319                                   foll_flags &= ~FOLL_WRITE;
 320
 321                                 switch (ret & ~VM_FAULT_WRITE) {
 322                                 case VM_FAULT_MINOR:
 323                                   tsk->min_flt++;
 324                                   break;
 325                                 case VM_FAULT_MAJOR:
 326                                   tsk->maj_flt++;
 327                                   break;
 328                                 case VM_FAULT_SIGBUS:
 329                                   return i ? i : -EFAULT;
 330                                 case VM_FAULT_OOM:
 331                                   return i ? i : -ENOMEM;
 332                                 default:
 333                                   BUG();
 334                                 }
 335
 336 #else
 337                                 if (ret & VM_FAULT_ERROR) {
 338                                   if (ret & VM_FAULT_OOM)
 339                                     return i ? i : -ENOMEM;
 340                                   else if (ret & VM_FAULT_SIGBUS)
 341                                     return i ? i : -EFAULT;
 342                                   BUG();
 343                                 }
 344                                 if (ret & VM_FAULT_MAJOR)
 345                                   tsk->maj_flt++;
 346                                 else
 347                                   tsk->min_flt++;
 348
 349                                 /*
 350                                  * The VM_FAULT_WRITE bit tells us that
 351                                  * do_wp_page has broken COW when necessary,
 352                                  * even if maybe_mkwrite decided not to set
 353                                  * pte_write. We can thus safely do subsequent
 354                                  * page lookups as if they were reads. But only
 355                                  * do so when looping for pte_write is futile:
 356                                  * in some cases userspace may also be wanting
 357                                  * to write to the gotten user page, which a
 358                                  * read fault here might prevent (a readonly
 359                                  * page might get reCOWed by userspace write).
 360                                  */
 361                                 if ((ret & VM_FAULT_WRITE) &&
 362                                     !(vma->vm_flags & VM_WRITE))
 363                                   foll_flags &= ~FOLL_WRITE;
 364
 365                                 //cond_resched();
 366 #endif
 367
 368                         }
 369
 370                         if (IS_ERR(page))
 371                                 return i ? i : PTR_ERR(page);
 372                         if (pages) {
 373                                 pages[i] = page;
 374
 375 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 376                                 flush_anon_page(page, start);
 377 #else
 378                                 flush_anon_page(vma, page, start);
 379 #endif
 380                                 flush_dcache_page(page);
 381                         }
 382                         if (vmas)
 383                                 vmas[i] = vma;
 384                         i++;
 385                         start += PAGE_SIZE;
 386                         len--;
 387                 } while (len && start < vma->vm_end);
 388         } while (len);
 389         return i;
 390 }
 391 #endif
 392
 393
 394 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 395                 unsigned long start, int len, int write, int force,
 396                 struct page **pages, struct vm_area_struct **vmas)
 397 {
 398 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 399         int flags = 0;
 400
 401         if (write)
 402                 flags |= GUP_FLAGS_WRITE;
 403         if (force)
 404                 flags |= GUP_FLAGS_FORCE;
 405
 406         return __get_user_pages_uprobe(tsk, mm,
 407                                 start, len, flags,
 408                                 pages, vmas);
 409 #else
 410         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
 411 #endif
 412 }
 413
 414 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 415 {
 416         struct mm_struct *mm;
 417         struct vm_area_struct *vma;
 418         void *old_buf = buf;
 419
 420         mm = get_task_mm(tsk);
 421         if (!mm)
 422                 return 0;
 423
 424         //down_read(&mm->mmap_sem);
 425         /* ignore errors, just check how much was successfully transferred */
 426         while (len) {
 427                 int bytes, ret, offset;
 428                 void *maddr;
 429                 struct page *page = NULL;
 430
 431                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
 432                                 write, 1, &page, &vma);
 433                 if (ret <= 0) {
 434                         /*
 435                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
 436                          * we can access using slightly different code.
 437                          */
 438 #ifdef CONFIG_HAVE_IOREMAP_PROT
 439                         vma = find_vma(mm, addr);
 440                         if (!vma)
 441                                 break;
 442                         if (vma->vm_ops && vma->vm_ops->access)
 443                                 ret = vma->vm_ops->access(vma, addr, buf,
 444                                                           len, write);
 445                         if (ret <= 0)
 446 #endif
 447                                 break;
 448                         bytes = ret;
 449                 } else {
 450                         bytes = len;
 451                         offset = addr & (PAGE_SIZE-1);
 452                         if (bytes > PAGE_SIZE-offset)
 453                                 bytes = PAGE_SIZE-offset;
 454
 455                         maddr = kmap(page);
 456                         if (write) {
 457                                 copy_to_user_page(vma, page, addr,
 458                                                   maddr + offset, buf, bytes);
 459                                 set_page_dirty_lock(page);
 460                         } else {
 461                                 copy_from_user_page(vma, page, addr,
 462                                                     buf, maddr + offset, bytes);
 463                         }
 464                         kunmap(page);
 465                         page_cache_release(page);
 466                 }
 467                 len -= bytes;
 468                 buf += bytes;
 469                 addr += bytes;
 470         }
 471         //up_read(&mm->mmap_sem);
 472         mmput(mm);
 473
 474         return buf - old_buf;
 475 }
 476
 477 int page_present (struct mm_struct *mm, unsigned long address)
 478 {
 479         pgd_t *pgd;
 480         pud_t *pud;
 481         pmd_t *pmd;
 482         pte_t *ptep, pte;
 483         unsigned long pfn;
 484
 485         pgd = pgd_offset(mm, address);
 486         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 487                 goto out;
 488
 489         pud = pud_offset(pgd, address);
 490         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
 491                 goto out;
 492
 493         pmd = pmd_offset(pud, address);
 494         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
 495                 goto out;
 496
 497         ptep = pte_offset_map(pmd, address);
 498         if (!ptep)
 499                 goto out;
 500
 501         pte = *ptep;
 502         pte_unmap(ptep);
 503         if (pte_present(pte)) {
 504                 pfn = pte_pfn(pte);
 505                 if (pfn_valid(pfn)) {
 506                         return 1;
 507                 }
 508         }
 509
 510 out:
 511         return 0;
 512 }
 513
 514
 515 EXPORT_SYMBOL_GPL (page_present);
 516 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
 517 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
 518