Fix kernel dump issue for tegra (problem with native kernel access_process_vm internals).
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 unsigned int *sched_addr;
38 unsigned int *fork_addr;
39
40
41 #define GUP_FLAGS_WRITE                  0x1
42 #define GUP_FLAGS_WRITE                  0x1
43 #define GUP_FLAGS_FORCE                  0x2
44 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
45 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
46
47 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
48
49 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
50
51 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
52
53 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
54 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
55 #else
56 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
57 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
58
59 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
60 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
61 DECLARE_MOD_FUNC_DEP(follow_page, \
62                 struct page *, struct vm_area_struct * vma, \
63                 unsigned long address, unsigned int foll_flags);
64 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
65                 void, struct vm_area_struct *vma, struct page *page, \
66                 unsigned long vmaddr);
67 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
68                 struct page *, struct vm_area_struct *vma, \
69                 unsigned long addr, pte_t pte);
70
71 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
72                 void, struct vm_area_struct *vma, struct page *page, \
73                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
74
75
76 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
77 DECLARE_MOD_FUNC_DEP(put_task_struct, \
78                 void, struct task_struct *tsk);
79 #else 
80 DECLARE_MOD_FUNC_DEP(put_task_struct, \
81                 void, struct rcu_head * rhp);
82 #endif
83
84         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
85 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
86
87         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
88 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
89
90 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
91         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
92                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
93 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
94 #else
95         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
96                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
97 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
98 #endif
99
100         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
101                         struct vm_area_struct *, struct task_struct *tsk)
102 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
103
104         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
105 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
106
107         DECLARE_MOD_DEP_WRAPPER (follow_page, \
108                         struct page *, struct vm_area_struct * vma, \
109                         unsigned long address, unsigned int foll_flags)
110 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
111
112         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
113                         void, struct vm_area_struct *vma, \
114                         struct page *page, unsigned long vmaddr)
115 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
116
117         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
118                         struct page *, struct vm_area_struct *vma, \
119                         unsigned long addr, pte_t pte)
120 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
121
122         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
123                         void, struct vm_area_struct *vma, struct page *page, \
124                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
125 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
126
127 int init_module_dependencies()
128 {
129         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
130         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
131         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
132         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
133         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
134         INIT_MOD_DEP_VAR(follow_page, follow_page);
135         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
136         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
137         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
138
139 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
140 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
141         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
142 # else
143         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
144 # endif
145 #else /*2.6.16 */
146         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
147 #endif
148
149         return 0;
150 }
151
152 #define GUP_FLAGS_WRITE                  0x1
153 #define GUP_FLAGS_FORCE                  0x2
154 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
155 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
156
157 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
158 static inline int use_zero_page(struct vm_area_struct *vma)
159 {
160         /*
161          * We don't want to optimize FOLL_ANON for make_pages_present()
162          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
163          * we want to get the page from the page tables to make sure
164          * that we serialize and update with any other user of that
165          * mapping.
166          */
167         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
168                 return 0;
169         /*
170          * And if we have a fault routine, it's not an anonymous region.
171          */
172         return !vma->vm_ops || !vma->vm_ops->fault;
173 }
174
175 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
176                      unsigned long start, int len, int flags,
177                 struct page **pages, struct vm_area_struct **vmas)
178 {
179         int i;
180         unsigned int vm_flags = 0;
181         int write = !!(flags & GUP_FLAGS_WRITE);
182         int force = !!(flags & GUP_FLAGS_FORCE);
183         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
184         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
185
186         if (len <= 0)
187                 return 0;
188         /* 
189          * Require read or write permissions.
190          * If 'force' is set, we only require the "MAY" flags.
191          */
192         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
193         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
194         i = 0;
195
196         do {
197                 struct vm_area_struct *vma;
198                 unsigned int foll_flags;
199
200                 //vma = find_extend_vma(mm, start);
201                 vma = find_vma(mm, start);
202                 if (!vma && in_gate_area(tsk, start)) {
203                         unsigned long pg = start & PAGE_MASK;
204                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
205                         pgd_t *pgd;
206                         pud_t *pud;
207                         pmd_t *pmd;
208                         pte_t *pte;
209
210                         /* user gate pages are read-only */
211                         if (!ignore && write)
212                                 return i ? : -EFAULT;
213                         if (pg > TASK_SIZE)
214                                 pgd = pgd_offset_k(pg);
215                         else
216                                 pgd = pgd_offset_gate(mm, pg);
217                         BUG_ON(pgd_none(*pgd));
218                         pud = pud_offset(pgd, pg);
219                         BUG_ON(pud_none(*pud));
220                         pmd = pmd_offset(pud, pg);
221                         if (pmd_none(*pmd))
222                                 return i ? : -EFAULT;
223                         pte = pte_offset_map(pmd, pg);
224                         if (pte_none(*pte)) {
225                                 pte_unmap(pte);
226                                 return i ? : -EFAULT;
227                         }
228                         if (pages) {
229                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
230                                 pages[i] = page;
231                                 if (page)
232                                         get_page(page);
233                         }
234                         pte_unmap(pte);
235                         if (vmas)
236                                 vmas[i] = gate_vma;
237                         i++;
238                         start += PAGE_SIZE;
239                         len--;
240                         continue;
241                 }
242
243                 if (!vma ||
244                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
245                     (!ignore && !(vm_flags & vma->vm_flags)))
246                         return i ? : -EFAULT;
247
248                 if (is_vm_hugetlb_page(vma)) {
249 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
250                         i = follow_hugetlb_page(mm, vma, pages, vmas,
251                                                 &start, &len, i);
252 #else
253                         i = follow_hugetlb_page(mm, vma, pages, vmas,
254                                                 &start, &len, i, write);
255 #endif
256                         continue;
257                 }
258
259                 foll_flags = FOLL_TOUCH;
260                 if (pages)
261                         foll_flags |= FOLL_GET;
262
263 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
264 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
265                 if (!write && use_zero_page(vma))
266                   foll_flags |= FOLL_ANON;
267 #endif
268 #endif
269
270                 do {
271                         struct page *page;
272
273 #if 0
274                         /*
275                          * If we have a pending SIGKILL, don't keep faulting
276                          * pages and potentially allocating memory, unless
277                          * current is handling munlock--e.g., on exit. In
278                          * that case, we are not allocating memory.  Rather,
279                          * we're only unlocking already resident/mapped pages.
280                          */
281                         if (unlikely(!ignore_sigkill &&
282                                         fatal_signal_pending(current)))
283                                 return i ? i : -ERESTARTSYS;
284 #endif
285
286                         if (write)
287                                 foll_flags |= FOLL_WRITE;
288
289                         
290                         //cond_resched();
291
292                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
293                         while (!(page = follow_page(vma, start, foll_flags))) {
294                                 int ret;
295                                 ret = handle_mm_fault(mm, vma, start,
296                                                 foll_flags & FOLL_WRITE);
297
298 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
299                                 if (ret & VM_FAULT_WRITE)
300                                   foll_flags &= ~FOLL_WRITE;
301                                 
302                                 switch (ret & ~VM_FAULT_WRITE) {
303                                 case VM_FAULT_MINOR:
304                                   tsk->min_flt++;
305                                   break;
306                                 case VM_FAULT_MAJOR:
307                                   tsk->maj_flt++;
308                                   break;
309                                 case VM_FAULT_SIGBUS:
310                                   return i ? i : -EFAULT;
311                                 case VM_FAULT_OOM:
312                                   return i ? i : -ENOMEM;
313                                 default:
314                                   BUG();
315                                 }
316                                 
317 #else
318                                 if (ret & VM_FAULT_ERROR) {
319                                   if (ret & VM_FAULT_OOM)
320                                     return i ? i : -ENOMEM;
321                                   else if (ret & VM_FAULT_SIGBUS)
322                                     return i ? i : -EFAULT;
323                                   BUG();
324                                 }
325                                 if (ret & VM_FAULT_MAJOR)
326                                   tsk->maj_flt++;
327                                 else
328                                   tsk->min_flt++;
329                                 
330                                 /*
331                                  * The VM_FAULT_WRITE bit tells us that
332                                  * do_wp_page has broken COW when necessary,
333                                  * even if maybe_mkwrite decided not to set
334                                  * pte_write. We can thus safely do subsequent
335                                  * page lookups as if they were reads. But only
336                                  * do so when looping for pte_write is futile:
337                                  * in some cases userspace may also be wanting
338                                  * to write to the gotten user page, which a
339                                  * read fault here might prevent (a readonly
340                                  * page might get reCOWed by userspace write).
341                                  */
342                                 if ((ret & VM_FAULT_WRITE) &&
343                                     !(vma->vm_flags & VM_WRITE))
344                                   foll_flags &= ~FOLL_WRITE;
345                                 
346                                 //cond_resched();
347 #endif
348                                 
349                         }
350
351                         if (IS_ERR(page))
352                                 return i ? i : PTR_ERR(page);
353                         if (pages) {
354                                 pages[i] = page;
355
356 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
357                                 flush_anon_page(page, start);
358 #else
359                                 flush_anon_page(vma, page, start);
360 #endif
361                                 flush_dcache_page(page);
362                         }
363                         if (vmas)
364                                 vmas[i] = vma;
365                         i++;
366                         start += PAGE_SIZE;
367                         len--;
368                 } while (len && start < vma->vm_end);
369         } while (len);
370         return i;
371 }
372 #endif
373
374
375 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
376                 unsigned long start, int len, int write, int force,
377                 struct page **pages, struct vm_area_struct **vmas)
378 {
379 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
380         int flags = 0;
381
382         if (write)
383                 flags |= GUP_FLAGS_WRITE;
384         if (force)
385                 flags |= GUP_FLAGS_FORCE;
386
387         return __get_user_pages_uprobe(tsk, mm,
388                                 start, len, flags,
389                                 pages, vmas);
390 #else
391         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
392 #endif
393 }
394
395 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
396 {
397         struct mm_struct *mm;
398         struct vm_area_struct *vma;
399         void *old_buf = buf;
400
401         mm = get_task_mm(tsk);
402         if (!mm)
403                 return 0;
404
405         down_read(&mm->mmap_sem);
406         /* ignore errors, just check how much was successfully transferred */
407         while (len) {
408                 int bytes, ret, offset;
409                 void *maddr;
410                 struct page *page = NULL;
411
412                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
413                                 write, 1, &page, &vma);
414                 if (ret <= 0) {
415                         /*
416                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
417                          * we can access using slightly different code.
418                          */
419 #ifdef CONFIG_HAVE_IOREMAP_PROT
420                         vma = find_vma(mm, addr);
421                         if (!vma)
422                                 break;
423                         if (vma->vm_ops && vma->vm_ops->access)
424                                 ret = vma->vm_ops->access(vma, addr, buf,
425                                                           len, write);
426                         if (ret <= 0)
427 #endif
428                                 break;
429                         bytes = ret;
430                 } else {
431                         bytes = len;
432                         offset = addr & (PAGE_SIZE-1);
433                         if (bytes > PAGE_SIZE-offset)
434                                 bytes = PAGE_SIZE-offset;
435
436                         maddr = kmap(page);
437                         if (write) {
438                                 copy_to_user_page(vma, page, addr,
439                                                   maddr + offset, buf, bytes);
440                                 set_page_dirty_lock(page);
441                         } else {
442                                 copy_from_user_page(vma, page, addr,
443                                                     buf, maddr + offset, bytes);
444                         }
445                         kunmap(page);
446                         page_cache_release(page);
447                 }
448                 len -= bytes;
449                 buf += bytes;
450                 addr += bytes;
451         }
452         up_read(&mm->mmap_sem);
453         mmput(mm);
454
455         return buf - old_buf;
456 }
457
458 int page_present (struct mm_struct *mm, unsigned long address)
459 {
460         pgd_t *pgd;
461         pud_t *pud;
462         pmd_t *pmd;
463         pte_t *ptep, pte;
464         unsigned long pfn;
465
466         pgd = pgd_offset(mm, address);
467         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
468                 goto out;
469
470         pud = pud_offset(pgd, address);
471         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
472                 goto out;
473
474         pmd = pmd_offset(pud, address);
475         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
476                 goto out;
477
478         ptep = pte_offset_map(pmd, address);
479         if (!ptep)
480                 goto out;
481
482         pte = *ptep;
483         pte_unmap(ptep);
484         if (pte_present(pte)) {
485                 pfn = pte_pfn(pte);
486                 if (pfn_valid(pfn)) {
487                         return 1;
488                 }
489         }
490
491 out:
492         return 0;
493 }
494
495
496 EXPORT_SYMBOL_GPL (page_present);
497 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
498 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
499