Fix tiny bugs for Beagle/SELP.
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 #include <linux/slab.h>
38 #include <linux/mm.h>
39
40 unsigned int *sched_addr;
41 unsigned int *fork_addr;
42
43
44 #define GUP_FLAGS_WRITE                  0x1
45 #define GUP_FLAGS_FORCE                  0x2
46 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
47 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
48
49 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
50 struct mm_struct* init_mm_ptr;
51 struct mm_struct init_mm;
52 #endif
53
54
55 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
56 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
57 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
58 DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
59 #endif
60
61 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
62
63 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
64 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
65 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
66 #endif
67 #else
68 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
69 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
70
71 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
72
73 #ifdef CONFIG_HUGETLB_PAGE
74 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
75 #endif
76
77 #ifdef  __HAVE_ARCH_GATE_AREA
78 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *tsk,unsigned long addr);
79 #else
80 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
81 #endif
82 DECLARE_MOD_FUNC_DEP(follow_page, \
83                 struct page *, struct vm_area_struct * vma, \
84                 unsigned long address, unsigned int foll_flags);
85 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
86                 void, struct vm_area_struct *vma, struct page *page, \
87                 unsigned long vmaddr);
88 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
89                 struct page *, struct vm_area_struct *vma, \
90                 unsigned long addr, pte_t pte);
91
92 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
93                 void, struct vm_area_struct *vma, struct page *page, \
94                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
95
96
97 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
98 DECLARE_MOD_FUNC_DEP(put_task_struct, \
99                 void, struct task_struct *tsk);
100 #else 
101 DECLARE_MOD_FUNC_DEP(put_task_struct, \
102                 void, struct rcu_head * rhp);
103 #endif
104
105         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
106 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
107
108         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
109 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
110
111 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
112 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
113         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
114                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
115 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
116 #endif
117 #else
118         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
119                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
120 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
121 #endif
122
123         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
124                         struct vm_area_struct *, struct task_struct *tsk)
125 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
126
127 #ifdef CONFIG_HUGETLB_PAGE
128         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write)
129         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
130 #endif
131
132 #ifdef  __HAVE_ARCH_GATE_AREA
133         DECLARE_MOD_DEP_WRAPPER (in_gate_area, int, struct task_struct *tsk, unsigned long addr)
134         IMP_MOD_DEP_WRAPPER (in_gate_area, tsk, addr)
135 #else
136         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
137 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
138 #endif
139
140 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
141         DECLARE_MOD_DEP_WRAPPER (follow_page, \
142                         struct page *, struct vm_area_struct * vma, \
143                         unsigned long address, unsigned int foll_flags)
144 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
145 #endif
146         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
147                         void, struct vm_area_struct *vma, \
148                         struct page *page, unsigned long vmaddr)
149 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
150
151         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
152                         struct page *, struct vm_area_struct *vma, \
153                         unsigned long addr, pte_t pte)
154 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
155
156         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
157                         void, struct vm_area_struct *vma, struct page *page, \
158                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
159 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
160
161 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
162         DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
163         IMP_MOD_DEP_WRAPPER (copy_to_user_page, vma, page, uaddr, dst, src, len)
164 #endif
165
166
167 int init_module_dependencies()
168 {
169
170 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
171         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
172         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
173 #endif
174
175 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
176         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
177 #endif
178
179         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
180         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
181         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
182
183 #ifdef CONFIG_HUGETLB_PAGE
184         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
185 #endif
186
187 #ifdef  __HAVE_ARCH_GATE_AREA
188         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
189 #else
190         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
191 #endif
192         INIT_MOD_DEP_VAR(follow_page, follow_page);
193
194         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
195         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
196         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
197
198 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
199 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
200         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
201 # else
202         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
203 # endif
204 #else /*2.6.16 */
205         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
206 #endif
207
208
209 // for 2.6.32.9-iboot (tegra-froyo)
210 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34))
211         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
212 #endif
213
214         return 0;
215 }
216
217 #define GUP_FLAGS_WRITE                  0x1
218 #define GUP_FLAGS_FORCE                  0x2
219 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
220 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
221
222 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
223 static inline int use_zero_page(struct vm_area_struct *vma)
224 {
225         /*
226          * We don't want to optimize FOLL_ANON for make_pages_present()
227          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
228          * we want to get the page from the page tables to make sure
229          * that we serialize and update with any other user of that
230          * mapping.
231          */
232         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
233                 return 0;
234         /*
235          * And if we have a fault routine, it's not an anonymous region.
236          */
237         return !vma->vm_ops || !vma->vm_ops->fault;
238 }
239
240 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
241                      unsigned long start, int len, int flags,
242                 struct page **pages, struct vm_area_struct **vmas)
243 {
244         int i;
245         unsigned int vm_flags = 0;
246         int write = !!(flags & GUP_FLAGS_WRITE);
247         int force = !!(flags & GUP_FLAGS_FORCE);
248         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
249         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
250
251         if (len <= 0)
252                 return 0;
253         /* 
254          * Require read or write permissions.
255          * If 'force' is set, we only require the "MAY" flags.
256          */
257         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
258         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
259         i = 0;
260
261         do {
262                 struct vm_area_struct *vma;
263                 unsigned int foll_flags;
264
265                 vma = find_vma(mm, start);
266                 if (!vma && in_gate_area(tsk, start)) {
267                         unsigned long pg = start & PAGE_MASK;
268                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
269                         pgd_t *pgd;
270                         pud_t *pud;
271                         pmd_t *pmd;
272                         pte_t *pte;
273
274                         /* user gate pages are read-only */
275                         if (!ignore && write)
276                                 return i ? : -EFAULT;
277                         if (pg > TASK_SIZE)
278                                 pgd = pgd_offset_k(pg);
279                         else
280                                 pgd = pgd_offset_gate(mm, pg);
281                         BUG_ON(pgd_none(*pgd));
282                         pud = pud_offset(pgd, pg);
283                         BUG_ON(pud_none(*pud));
284                         pmd = pmd_offset(pud, pg);
285                         if (pmd_none(*pmd))
286                                 return i ? : -EFAULT;
287                         pte = pte_offset_map(pmd, pg);
288                         if (pte_none(*pte)) {
289                                 pte_unmap(pte);
290                                 return i ? : -EFAULT;
291                         }
292                         if (pages) {
293                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
294                                 pages[i] = page;
295                                 if (page)
296                                         get_page(page);
297                         }
298                         pte_unmap(pte);
299                         if (vmas)
300                                 vmas[i] = gate_vma;
301                         i++;
302                         start += PAGE_SIZE;
303                         len--;
304                         continue;
305                 }
306
307                 if (!vma ||
308                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
309                     (!ignore && !(vm_flags & vma->vm_flags)))
310                         return i ? : -EFAULT;
311
312                 if (is_vm_hugetlb_page(vma)) {
313 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
314                         i = follow_hugetlb_page(mm, vma, pages, vmas,
315                                                 &start, &len, i);
316 #else
317                         i = follow_hugetlb_page(mm, vma, pages, vmas,
318                                                 &start, &len, i, write);
319 #endif
320                         continue;
321                 }
322
323                 foll_flags = FOLL_TOUCH;
324                 if (pages)
325                         foll_flags |= FOLL_GET;
326
327 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
328 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
329                 if (!write && use_zero_page(vma))
330                   foll_flags |= FOLL_ANON;
331 #endif
332 #endif
333
334                 do {
335                         struct page *page;
336
337 #if 0
338                         /*
339                          * If we have a pending SIGKILL, don't keep faulting
340                          * pages and potentially allocating memory, unless
341                          * current is handling munlock--e.g., on exit. In
342                          * that case, we are not allocating memory.  Rather,
343                          * we're only unlocking already resident/mapped pages.
344                          */
345                         if (unlikely(!ignore_sigkill &&
346                                         fatal_signal_pending(current)))
347                                 return i ? i : -ERESTARTSYS;
348 #endif
349
350                         if (write)
351                                 foll_flags |= FOLL_WRITE;
352
353                         
354                         //cond_resched();
355
356                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
357                         while (!(page = follow_page(vma, start, foll_flags))) {
358                                 int ret;
359                                 ret = handle_mm_fault(mm, vma, start,
360                                                 foll_flags & FOLL_WRITE);
361
362 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
363                                 if (ret & VM_FAULT_WRITE)
364                                   foll_flags &= ~FOLL_WRITE;
365                                 
366                                 switch (ret & ~VM_FAULT_WRITE) {
367                                 case VM_FAULT_MINOR:
368                                   tsk->min_flt++;
369                                   break;
370                                 case VM_FAULT_MAJOR:
371                                   tsk->maj_flt++;
372                                   break;
373                                 case VM_FAULT_SIGBUS:
374                                   return i ? i : -EFAULT;
375                                 case VM_FAULT_OOM:
376                                   return i ? i : -ENOMEM;
377                                 default:
378                                   BUG();
379                                 }
380                                 
381 #else
382                                 if (ret & VM_FAULT_ERROR) {
383                                   if (ret & VM_FAULT_OOM)
384                                     return i ? i : -ENOMEM;
385                                   else if (ret & VM_FAULT_SIGBUS)
386                                     return i ? i : -EFAULT;
387                                   BUG();
388                                 }
389                                 if (ret & VM_FAULT_MAJOR)
390                                   tsk->maj_flt++;
391                                 else
392                                   tsk->min_flt++;
393                                 
394                                 /*
395                                  * The VM_FAULT_WRITE bit tells us that
396                                  * do_wp_page has broken COW when necessary,
397                                  * even if maybe_mkwrite decided not to set
398                                  * pte_write. We can thus safely do subsequent
399                                  * page lookups as if they were reads. But only
400                                  * do so when looping for pte_write is futile:
401                                  * in some cases userspace may also be wanting
402                                  * to write to the gotten user page, which a
403                                  * read fault here might prevent (a readonly
404                                  * page might get reCOWed by userspace write).
405                                  */
406                                 if ((ret & VM_FAULT_WRITE) &&
407                                     !(vma->vm_flags & VM_WRITE))
408                                   foll_flags &= ~FOLL_WRITE;
409                                 
410                                 //cond_resched();
411 #endif
412                                 
413                         }
414
415                         if (IS_ERR(page))
416                                 return i ? i : PTR_ERR(page);
417                         if (pages) {
418                                 pages[i] = page;
419
420 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
421                                 flush_anon_page(page, start);
422 #else
423                                 flush_anon_page(vma, page, start);
424 #endif
425                                 flush_dcache_page(page);
426                         }
427                         if (vmas)
428                                 vmas[i] = vma;
429                         i++;
430                         start += PAGE_SIZE;
431                         len--;
432                 } while (len && start < vma->vm_end);
433         } while (len);
434         return i;
435 }
436 #endif
437
438
439 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
440                 unsigned long start, int len, int write, int force,
441                 struct page **pages, struct vm_area_struct **vmas)
442 {
443 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
444         int flags = 0;
445
446         if (write)
447                 flags |= GUP_FLAGS_WRITE;
448         if (force)
449                 flags |= GUP_FLAGS_FORCE;
450
451         return __get_user_pages_uprobe(tsk, mm,
452                                 start, len, flags,
453                                 pages, vmas);
454 #else
455         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
456 #endif
457 }
458
459
460 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
461 {
462         struct mm_struct *mm;
463         struct vm_area_struct *vma;
464         void *old_buf = buf;
465         unsigned long addr1 = addr;
466         unsigned int* inst_buf = (unsigned int*)old_buf;
467
468
469         mm = get_task_mm(tsk);
470         if (!mm)
471                 return 0;
472
473         /* down_read(&mm->mmap_sem); */
474         /* ignore errors, just check how much was successfully transferred */
475         while (len) {
476                 int bytes, ret, offset;
477                 void *maddr;
478                 struct page *page = NULL;
479
480                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
481                                 write, 1, &page, &vma);
482
483                 if (ret <= 0) {
484                         /*
485                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
486                          * we can access using slightly different code.
487                          */
488 #ifdef CONFIG_HAVE_IOREMAP_PROT
489                         vma = find_vma(mm, addr);
490                         if (!vma)
491                                 break;
492                         if (vma->vm_ops && vma->vm_ops->access)
493                                 ret = vma->vm_ops->access(vma, addr, buf,
494                                                           len, write);
495                         if (ret <= 0)
496 #endif
497                                 break;
498                         bytes = ret;
499                 } else {
500                         bytes = len;
501                         offset = addr & (PAGE_SIZE-1);
502                         if (bytes > PAGE_SIZE-offset)
503                                 bytes = PAGE_SIZE-offset;
504
505                         maddr = kmap(page);
506                         if (write) {
507                                 copy_to_user_page(vma, page, addr,
508                                                   maddr + offset, buf, bytes);
509                                 set_page_dirty_lock(page);
510                         } else {
511                                 copy_from_user_page(vma, page, addr,
512                                                     buf, maddr + offset, bytes);
513                         }
514                         kunmap(page);
515                         page_cache_release(page);
516                 }
517                 len -= bytes;
518                 buf += bytes;
519                 addr += bytes;
520         }
521         /* up_read(&mm->mmap_sem); */
522         mmput(mm);
523
524         return buf - old_buf;
525 }
526
527 int page_present (struct mm_struct *mm, unsigned long address)
528 {
529         pgd_t *pgd;
530         pud_t *pud;
531         pmd_t *pmd;
532         pte_t *ptep, pte;
533         unsigned long pfn;
534
535         pgd = pgd_offset(mm, address);
536         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
537                 goto out;
538
539         pud = pud_offset(pgd, address);
540         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
541                 goto out;
542
543         pmd = pmd_offset(pud, address);
544         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
545                 goto out;
546
547         ptep = pte_offset_map(pmd, address);
548         if (!ptep)
549                 goto out;
550
551         pte = *ptep;
552         pte_unmap(ptep);
553         if (pte_present(pte)) {
554                 pfn = pte_pfn(pte);
555                 if (pfn_valid(pfn)) {
556                         return 1;
557                 }
558         }
559
560 out:
561         return 0;
562 }
563
564
565 EXPORT_SYMBOL_GPL (page_present);
566 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
567 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
568