Fix kernel dump during app instrumentation for Tegra 250 (multi-core ARM)
authorEkaterina Gorelkina <ekaterina@ekaterina-desktop.(none)>
Thu, 15 Jul 2010 13:33:55 +0000 (17:33 +0400)
committerEkaterina Gorelkina <ekaterina@ekaterina-desktop.(none)>
Thu, 15 Jul 2010 13:33:55 +0000 (17:33 +0400)
kprobe/kprobes.c
kprobe/kprobes.h
kprobe/kprobes_arch.c

index 7e5a524..3848a7e 100644 (file)
@@ -820,6 +820,7 @@ valid_p:
 int __kprobes
 register_ujprobe (struct task_struct *task, struct mm_struct *mm, struct jprobe *jp, int atomic)
 {
+       int ret = 0;
 #ifdef _DEBUG
        gSilent = 0;
 #endif
@@ -827,7 +828,7 @@ register_ujprobe (struct task_struct *task, struct mm_struct *mm, struct jprobe
        jp->kp.pre_handler = setjmp_pre_handler;
        jp->kp.break_handler = longjmp_break_handler;
        
-       int ret = __register_uprobe (&jp->kp, task, atomic,
+       ret = __register_uprobe (&jp->kp, task, atomic,
                                    (unsigned long) __builtin_return_address (0));
 
 #ifdef _DEBUG
@@ -1302,57 +1303,270 @@ unregister_all_uprobes (struct task_struct *task, int atomic)
        purge_garbage_uslots(task, atomic);
 }
 
-#if 0
+
+#define GUP_FLAGS_WRITE                  0x1
+#define GUP_FLAGS_FORCE                  0x2
+#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+#define GUP_FLAGS_IGNORE_SIGKILL         0x8
+
+
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+       /*
+        * We don't want to optimize FOLL_ANON for make_pages_present()
+        * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+        * we want to get the page from the page tables to make sure
+        * that we serialize and update with any other user of that
+        * mapping.
+        */
+       if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+               return 0;
+       /*
+        * And if we have a fault routine, it's not an anonymous region.
+        */
+       return !vma->vm_ops || !vma->vm_ops->fault;
+}
+
+int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, int len, int flags,
+               struct page **pages, struct vm_area_struct **vmas)
+{
+       int i;
+       unsigned int vm_flags = 0;
+       int write = !!(flags & GUP_FLAGS_WRITE);
+       int force = !!(flags & GUP_FLAGS_FORCE);
+       int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
+       int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
+
+       if (len <= 0)
+               return 0;
+       /* 
+        * Require read or write permissions.
+        * If 'force' is set, we only require the "MAY" flags.
+        */
+       vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+       vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+       i = 0;
+
+       do {
+               struct vm_area_struct *vma;
+               unsigned int foll_flags;
+
+               //vma = find_extend_vma(mm, start);
+               vma = find_vma(mm, start);
+               if (!vma && in_gate_area(tsk, start)) {
+                       unsigned long pg = start & PAGE_MASK;
+                       struct vm_area_struct *gate_vma = get_gate_vma(tsk);
+                       pgd_t *pgd;
+                       pud_t *pud;
+                       pmd_t *pmd;
+                       pte_t *pte;
+
+                       /* user gate pages are read-only */
+                       if (!ignore && write)
+                               return i ? : -EFAULT;
+                       if (pg > TASK_SIZE)
+                               pgd = pgd_offset_k(pg);
+                       else
+                               pgd = pgd_offset_gate(mm, pg);
+                       BUG_ON(pgd_none(*pgd));
+                       pud = pud_offset(pgd, pg);
+                       BUG_ON(pud_none(*pud));
+                       pmd = pmd_offset(pud, pg);
+                       if (pmd_none(*pmd))
+                               return i ? : -EFAULT;
+                       pte = pte_offset_map(pmd, pg);
+                       if (pte_none(*pte)) {
+                               pte_unmap(pte);
+                               return i ? : -EFAULT;
+                       }
+                       if (pages) {
+                               struct page *page = vm_normal_page(gate_vma, start, *pte);
+                               pages[i] = page;
+                               if (page)
+                                       get_page(page);
+                       }
+                       pte_unmap(pte);
+                       if (vmas)
+                               vmas[i] = gate_vma;
+                       i++;
+                       start += PAGE_SIZE;
+                       len--;
+                       continue;
+               }
+
+               if (!vma ||
+                   (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+                   (!ignore && !(vm_flags & vma->vm_flags)))
+                       return i ? : -EFAULT;
+
+               if (is_vm_hugetlb_page(vma)) {
+                       i = follow_hugetlb_page(mm, vma, pages, vmas,
+                                               &start, &len, i, write);
+                       continue;
+               }
+
+               foll_flags = FOLL_TOUCH;
+               if (pages)
+                       foll_flags |= FOLL_GET;
+               if (!write && use_zero_page(vma))
+                 foll_flags |= FOLL_ANON;
+
+               do {
+                       struct page *page;
+
+                       /*
+                        * If we have a pending SIGKILL, don't keep faulting
+                        * pages and potentially allocating memory, unless
+                        * current is handling munlock--e.g., on exit. In
+                        * that case, we are not allocating memory.  Rather,
+                        * we're only unlocking already resident/mapped pages.
+                        */
+                       if (unlikely(!ignore_sigkill &&
+                                       fatal_signal_pending(current)))
+                               return i ? i : -ERESTARTSYS;
+
+                       if (write)
+                               foll_flags |= FOLL_WRITE;
+
+                       
+                       //cond_resched();
+
+                       DBPRINTF ("pages = %p vma = %p\n", pages, vma);
+                       while (!(page = follow_page(vma, start, foll_flags))) {
+                               int ret;
+                               ret = handle_mm_fault(mm, vma, start,
+                                               foll_flags & FOLL_WRITE);
+                               if (ret & VM_FAULT_ERROR) {
+                                       if (ret & VM_FAULT_OOM)
+                                               return i ? i : -ENOMEM;
+                                       else if (ret & VM_FAULT_SIGBUS)
+                                               return i ? i : -EFAULT;
+                                       BUG();
+                               }
+                               if (ret & VM_FAULT_MAJOR)
+                                       tsk->maj_flt++;
+                               else
+                                       tsk->min_flt++;
+
+                               /*
+                                * The VM_FAULT_WRITE bit tells us that
+                                * do_wp_page has broken COW when necessary,
+                                * even if maybe_mkwrite decided not to set
+                                * pte_write. We can thus safely do subsequent
+                                * page lookups as if they were reads. But only
+                                * do so when looping for pte_write is futile:
+                                * in some cases userspace may also be wanting
+                                * to write to the gotten user page, which a
+                                * read fault here might prevent (a readonly
+                                * page might get reCOWed by userspace write).
+                                */
+                               if ((ret & VM_FAULT_WRITE) &&
+                                   !(vma->vm_flags & VM_WRITE))
+                                       foll_flags &= ~FOLL_WRITE;
+
+                               //cond_resched();
+                       }
+                       if (IS_ERR(page))
+                               return i ? i : PTR_ERR(page);
+                       if (pages) {
+                               pages[i] = page;
+
+                               flush_anon_page(vma, page, start);
+                               flush_dcache_page(page);
+                       }
+                       if (vmas)
+                               vmas[i] = vma;
+                       i++;
+                       start += PAGE_SIZE;
+                       len--;
+               } while (len && start < vma->vm_end);
+       } while (len);
+       return i;
+}
+
+int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, int len, int write, int force,
+               struct page **pages, struct vm_area_struct **vmas)
+{
+       int flags = 0;
+
+       if (write)
+               flags |= GUP_FLAGS_WRITE;
+       if (force)
+               flags |= GUP_FLAGS_FORCE;
+
+       return __get_user_pages_uprobe(tsk, mm,
+                               start, len, flags,
+                               pages, vmas);
+}
+
 int
-access_process_vm (struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+access_process_vm_atomic (struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 {
-       struct mm_struct *mm;
+
+       
+       struct mm_struct *mm;
        struct vm_area_struct *vma;
-       struct page *page;
        void *old_buf = buf;
 
-       mm = get_task_mm (tsk);
+       mm = get_task_mm(tsk);
        if (!mm)
                return 0;
 
-       down_read (&mm->mmap_sem);
-       /* ignore errors, just check how much was sucessfully transfered */
-       while (len)
-       {
+       down_read(&mm->mmap_sem);
+       /* ignore errors, just check how much was successfully transferred */
+       while (len) {
                int bytes, ret, offset;
                void *maddr;
-
-               ret = get_user_pages (tsk, mm, addr, 1, write, 1, &page, &vma);
-               if (ret <= 0)
-                       break;
-
-               bytes = len;
-               offset = addr & (PAGE_SIZE - 1);
-               if (bytes > PAGE_SIZE - offset)
-                       bytes = PAGE_SIZE - offset;
-
-               maddr = kmap (page);    //, KM_USER0);
-               if (write)
-               {
-                       copy_to_user_page (vma, page, addr, maddr + offset, buf, bytes);
-                       set_page_dirty_lock (page);
-               }
-               else
-               {
-                       copy_from_user_page (vma, page, addr, buf, maddr + offset, bytes);
+               struct page *page = NULL;
+
+               ret = get_user_pages_uprobe(tsk, mm, addr, 1,
+                               write, 1, &page, &vma);
+               if (ret <= 0) {
+                       /*
+                        * Check if this is a VM_IO | VM_PFNMAP VMA, which
+                        * we can access using slightly different code.
+                        */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+                       vma = find_vma(mm, addr);
+                       if (!vma)
+                               break;
+                       if (vma->vm_ops && vma->vm_ops->access)
+                               ret = vma->vm_ops->access(vma, addr, buf,
+                                                         len, write);
+                       if (ret <= 0)
+#endif
+                               break;
+                       bytes = ret;
+               } else {
+                       bytes = len;
+                       offset = addr & (PAGE_SIZE-1);
+                       if (bytes > PAGE_SIZE-offset)
+                               bytes = PAGE_SIZE-offset;
+
+                       maddr = kmap(page);
+                       if (write) {
+                               copy_to_user_page(vma, page, addr,
+                                                 maddr + offset, buf, bytes);
+                               set_page_dirty_lock(page);
+                       } else {
+                               copy_from_user_page(vma, page, addr,
+                                                   buf, maddr + offset, bytes);
+                       }
+                       kunmap(page);
+                       page_cache_release(page);
                }
-               kunmap (page);  //, KM_USER0);
-               page_cache_release (page);
                len -= bytes;
                buf += bytes;
                addr += bytes;
        }
-       up_read (&mm->mmap_sem);
-       mmput (mm);
+       up_read(&mm->mmap_sem);
+       mmput(mm);
 
        return buf - old_buf;
+
 }
-#endif
 
 #ifdef CONFIG_DEBUG_FS
 const char *(*__real_kallsyms_lookup) (unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf);
@@ -1732,7 +1946,7 @@ EXPORT_SYMBOL_GPL (unregister_kretprobe);
 EXPORT_SYMBOL_GPL (register_uretprobe);
 EXPORT_SYMBOL_GPL (unregister_uretprobe);
 EXPORT_SYMBOL_GPL (unregister_all_uprobes);
-//EXPORT_SYMBOL_GPL (access_process_vm_atomic);
+EXPORT_SYMBOL_GPL (access_process_vm_atomic);
 #if LINUX_VERSION_CODE != KERNEL_VERSION(2,6,23)
 EXPORT_SYMBOL_GPL (access_process_vm);
 #endif
@@ -1741,4 +1955,4 @@ EXPORT_SYMBOL_GPL (is_page_present);
 #else
 EXPORT_SYMBOL_GPL (page_present);
 #endif
-//EXPORT_SYMBOL_GPL(get_user_pages_atomic);
+
index 8ca0ca1..82b0235 100644 (file)
@@ -246,8 +246,8 @@ void set_normalized_timeval (struct timeval *tv, time_t sec, suseconds_t usec);
 kprobe_opcode_t *get_insn_slot (struct task_struct *task, int atomic);
 void free_insn_slot (struct hlist_head *page_list, struct task_struct *task, kprobe_opcode_t *slot, int dirty);
 
-//int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
-#define access_process_vm_atomic       access_process_vm
+int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+
 #define read_proc_vm_atomic(tsk, addr, buf, len)       access_process_vm_atomic(tsk, addr, buf, len, 0)
 #define write_proc_vm_atomic(tsk, addr, buf, len)      access_process_vm_atomic(tsk, addr, buf, len, 1)
 int page_present (struct mm_struct *mm, unsigned long addr);
index 46c2e19..e3bfc03 100644 (file)
@@ -1572,6 +1572,12 @@ collect_one_slot (struct hlist_head *page_list, struct task_struct *task,
                else
                {
                        if(task){
+//E. G.: This code provides kernel dump because of rescheduling while atomic. 
+//As workaround, this code was commented. In this case we will have memory leaks 
+//for instrumented process, but instrumentation process should functionate correctly. 
+//Planned that good solution for this problem will be done during redesigning KProbe 
+//for improving supportability and performance.
+#if 0
                                //printk("collect_one_slot %p/%d\n", task, task->pid);
                                mm = get_task_mm (task);
                                if (mm){                        
@@ -1580,6 +1586,8 @@ collect_one_slot (struct hlist_head *page_list, struct task_struct *task,
                                        up_write (&mm->mmap_sem);
                                        mmput(mm);
                                }
+#endif
+                               kip->insns = NULL; //workaround
                                kip->tgid = 0;
                        }
                        else {
@@ -2314,7 +2322,7 @@ setjmp_pre_handler (struct kprobe *p, struct pt_regs *regs)
                        rcu_read_unlock();
                }
                if (pre_entry)
-                       p->ss_addr = pre_entry (jp->priv_arg, regs);
+                       p->ss_addr = (void *)pre_entry (jp->priv_arg, regs);
                if (entry){
 # if defined(CONFIG_MIPS)
                        entry (regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7], regs->regs[8], regs->regs[9]);
@@ -3080,6 +3088,17 @@ static struct jprobe do_exit_p =
 // persistent deps
 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
+
+DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
+DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
+DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
+DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
+DECLARE_MOD_FUNC_DEP(follow_page, struct page *, struct vm_area_struct * vma, unsigned long address, unsigned int foll_flags);
+DECLARE_MOD_FUNC_DEP(__flush_anon_page, void, struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
+DECLARE_MOD_FUNC_DEP(vm_normal_page, struct page *, struct vm_area_struct *vma, unsigned long addr, pte_t pte);
+DECLARE_MOD_FUNC_DEP(flush_ptrace_access, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len, int write);
+
+
 // deps controled by config macros
 #ifdef KERNEL_HAS_ISPAGEPRESENT
 DECLARE_MOD_FUNC_DEP(is_page_present, int, struct mm_struct * mm, unsigned long address);
@@ -3128,6 +3147,32 @@ DECLARE_MOD_FUNC_DEP(put_task_struct, void, struct rcu_head * rhp);
 // persistent deps
 DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 IMP_MOD_DEP_WRAPPER(access_process_vm, tsk, addr, buf, len, write)
+
+DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
+IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
+
+DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
+IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
+
+DECLARE_MOD_DEP_WRAPPER (get_gate_vma, struct vm_area_struct *, struct task_struct *tsk)
+IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
+
+DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
+IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
+
+DECLARE_MOD_DEP_WRAPPER (follow_page, struct page *, struct vm_area_struct * vma, unsigned long address, unsigned int foll_flags)
+IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
+
+DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, void, struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
+
+DECLARE_MOD_DEP_WRAPPER(vm_normal_page, struct page *, struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
+
+DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len, int write)
+IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
+
+
 // deps controled by config macros
 #ifdef KERNEL_HAS_ISPAGEPRESENT
 int is_page_present (struct mm_struct *mm, unsigned long address)
@@ -3219,6 +3264,15 @@ int __init arch_init_kprobes (void)
  
        sched_addr = (kprobe_opcode_t *)kallsyms_search("__switch_to");//"schedule");
        fork_addr = (kprobe_opcode_t *)kallsyms_search("do_fork");
+
+       INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
+       INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
+       INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
+       INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
+       INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
+       INIT_MOD_DEP_VAR(follow_page, follow_page);
+       INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
+       INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
  
        INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 #ifdef KERNEL_HAS_ISPAGEPRESENT