Redesign KProbe module (separating core and arch parts).
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29
30 #include "dbi_kprobes_deps.h"
31 #include "dbi_kdebug.h"
32
33
34 unsigned int *sched_addr;
35 unsigned int *fork_addr;
36
37
38 #define GUP_FLAGS_WRITE                  0x1
39 #define GUP_FLAGS_WRITE                  0x1
40 #define GUP_FLAGS_FORCE                  0x2
41 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
42 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
43
44 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
45
46 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
47
48 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
49
50 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
51 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
52 #else
53 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
54 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
55
56 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
57 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
58 DECLARE_MOD_FUNC_DEP(follow_page, \
59                 struct page *, struct vm_area_struct * vma, \
60                 unsigned long address, unsigned int foll_flags);
61 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
62                 void, struct vm_area_struct *vma, struct page *page, \
63                 unsigned long vmaddr);
64 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
65                 struct page *, struct vm_area_struct *vma, \
66                 unsigned long addr, pte_t pte);
67
68 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
69                 void, struct vm_area_struct *vma, struct page *page, \
70                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
71
72
73 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
74 DECLARE_MOD_FUNC_DEP(put_task_struct, \
75                 void, struct task_struct *tsk);
76 #else 
77 DECLARE_MOD_FUNC_DEP(put_task_struct, \
78                 void, struct rcu_head * rhp);
79 #endif
80
81         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
82 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
83
84         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
85 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
86
87 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
88         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
89                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
90 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
91 #else
92         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
93                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
94 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
95 #endif
96
97         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
98                         struct vm_area_struct *, struct task_struct *tsk)
99 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
100
101         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
102 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
103
104         DECLARE_MOD_DEP_WRAPPER (follow_page, \
105                         struct page *, struct vm_area_struct * vma, \
106                         unsigned long address, unsigned int foll_flags)
107 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
108
109         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
110                         void, struct vm_area_struct *vma, \
111                         struct page *page, unsigned long vmaddr)
112 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
113
114         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
115                         struct page *, struct vm_area_struct *vma, \
116                         unsigned long addr, pte_t pte)
117 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
118
119         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
120                         void, struct vm_area_struct *vma, struct page *page, \
121                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
122 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
123
124 int init_module_dependencies()
125 {
126         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
127         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
128         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
129         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
130         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
131         INIT_MOD_DEP_VAR(follow_page, follow_page);
132         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
133         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
134         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
135
136 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
137 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
138         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
139 # else
140         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
141 # endif
142 #else /*2.6.16 */
143         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
144 #endif
145
146         return 0;
147 }
148
149
150 static inline 
151 int use_zero_page(struct vm_area_struct *vma)
152 {
153         /*
154          * We don't want to optimize FOLL_ANON for make_pages_present()
155          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
156          * we want to get the page from the page tables to make sure
157          * that we serialize and update with any other user of that
158          * mapping.
159          */
160         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
161                 return 0;
162         /*
163          * And if we have a fault routine, it's not an anonymous region.
164          */
165         return !vma->vm_ops || !vma->vm_ops->fault;
166 }
167
168 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
169                 unsigned long start, int len, int flags,
170                 struct page **pages, struct vm_area_struct **vmas)
171 {
172         int i;
173         unsigned int vm_flags = 0;
174         int write = !!(flags & GUP_FLAGS_WRITE);
175         int force = !!(flags & GUP_FLAGS_FORCE);
176         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
177         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
178
179         if (len <= 0)
180                 return 0;
181         /* 
182          * Require read or write permissions.
183          * If 'force' is set, we only require the "MAY" flags.
184          */
185         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
186         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
187         i = 0;
188
189         do {
190                 struct vm_area_struct *vma;
191                 unsigned int foll_flags;
192
193                 //vma = find_extend_vma(mm, start);
194                 vma = find_vma(mm, start);
195                 if (!vma && in_gate_area(tsk, start)) {
196                         unsigned long pg = start & PAGE_MASK;
197                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
198                         pgd_t *pgd;
199                         pud_t *pud;
200                         pmd_t *pmd;
201                         pte_t *pte;
202
203                         /* user gate pages are read-only */
204                         if (!ignore && write)
205                                 return i ? : -EFAULT;
206                         if (pg > TASK_SIZE)
207                                 pgd = pgd_offset_k(pg);
208                         else
209                                 pgd = pgd_offset_gate(mm, pg);
210                         BUG_ON(pgd_none(*pgd));
211                         pud = pud_offset(pgd, pg);
212                         BUG_ON(pud_none(*pud));
213                         pmd = pmd_offset(pud, pg);
214                         if (pmd_none(*pmd))
215                                 return i ? : -EFAULT;
216                         pte = pte_offset_map(pmd, pg);
217                         if (pte_none(*pte)) {
218                                 pte_unmap(pte);
219                                 return i ? : -EFAULT;
220                         }
221                         if (pages) {
222                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
223                                 pages[i] = page;
224                                 if (page)
225                                         get_page(page);
226                         }
227                         pte_unmap(pte);
228                         if (vmas)
229                                 vmas[i] = gate_vma;
230                         i++;
231                         start += PAGE_SIZE;
232                         len--;
233                         continue;
234                 }
235
236                 if (!vma ||
237                                 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
238                                 (!ignore && !(vm_flags & vma->vm_flags)))
239                         return i ? : -EFAULT;
240
241                 if (is_vm_hugetlb_page(vma)) {
242                         i = follow_hugetlb_page(mm, vma, pages, vmas,
243                                         &start, &len, i, write);
244                         continue;
245                 }
246
247                 foll_flags = FOLL_TOUCH;
248                 if (pages)
249                         foll_flags |= FOLL_GET;
250
251 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
252                 if (!write && use_zero_page(vma))
253                         foll_flags |= FOLL_ANON;
254 #endif
255
256                 do {
257                         struct page *page;
258
259                         /*
260                          * If we have a pending SIGKILL, don't keep faulting
261                          * pages and potentially allocating memory, unless
262                          * current is handling munlock--e.g., on exit. In
263                          * that case, we are not allocating memory.  Rather,
264                          * we're only unlocking already resident/mapped pages.
265                          */
266                         if (unlikely(!ignore_sigkill &&
267                                                 fatal_signal_pending(current)))
268                                 return i ? i : -ERESTARTSYS;
269
270                         if (write)
271                                 foll_flags |= FOLL_WRITE;
272
273
274                         //cond_resched();
275
276                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
277                         while (!(page = follow_page(vma, start, foll_flags))) {
278                                 int ret;
279                                 ret = handle_mm_fault(mm, vma, start,
280                                                 foll_flags & FOLL_WRITE);
281                                 if (ret & VM_FAULT_ERROR) {
282                                         if (ret & VM_FAULT_OOM)
283                                                 return i ? i : -ENOMEM;
284                                         else if (ret & VM_FAULT_SIGBUS)
285                                                 return i ? i : -EFAULT;
286                                         BUG();
287                                 }
288                                 if (ret & VM_FAULT_MAJOR)
289                                         tsk->maj_flt++;
290                                 else
291                                         tsk->min_flt++;
292
293                                 /*
294                                  * The VM_FAULT_WRITE bit tells us that
295                                  * do_wp_page has broken COW when necessary,
296                                  * even if maybe_mkwrite decided not to set
297                                  * pte_write. We can thus safely do subsequent
298                                  * page lookups as if they were reads. But only
299                                  * do so when looping for pte_write is futile:
300                                  * in some cases userspace may also be wanting
301                                  * to write to the gotten user page, which a
302                                  * read fault here might prevent (a readonly
303                                  * page might get reCOWed by userspace write).
304                                  */
305                                 if ((ret & VM_FAULT_WRITE) &&
306                                                 !(vma->vm_flags & VM_WRITE))
307                                         foll_flags &= ~FOLL_WRITE;
308
309                                 //cond_resched();
310                         }
311                         if (IS_ERR(page))
312                                 return i ? i : PTR_ERR(page);
313                         if (pages) {
314                                 pages[i] = page;
315
316                                 flush_anon_page(vma, page, start);
317                                 flush_dcache_page(page);
318                         }
319                         if (vmas)
320                                 vmas[i] = vma;
321                         i++;
322                         start += PAGE_SIZE;
323                         len--;
324                 } while (len && start < vma->vm_end);
325         } while (len);
326         return i;
327 }
328
329 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
330                 unsigned long start, int len, int write, int force,
331                 struct page **pages, struct vm_area_struct **vmas)
332 {
333 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
334         int flags = 0;
335
336         if (write)
337                 flags |= GUP_FLAGS_WRITE;
338         if (force)
339                 flags |= GUP_FLAGS_FORCE;
340
341         return __get_user_pages_uprobe(tsk, mm,
342                         start, len, flags,
343                         pages, vmas);
344 #else
345         return get_user_pages(tsk, mm,
346                               start, len, write, force,
347                               pages, vmas);
348 #endif
349 }
350
351 int access_process_vm_atomic (struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
352 {
353
354
355         struct mm_struct *mm;
356         struct vm_area_struct *vma;
357         void *old_buf = buf;
358
359         mm = get_task_mm(tsk);
360         if (!mm)
361                 return 0;
362
363         down_read(&mm->mmap_sem);
364         /* ignore errors, just check how much was successfully transferred */
365         while (len) {
366                 int bytes, ret, offset;
367                 void *maddr;
368                 struct page *page = NULL;
369
370                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
371                                 write, 1, &page, &vma);
372                 if (ret <= 0) {
373                         /*
374                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
375                          * we can access using slightly different code.
376                          */
377 #ifdef CONFIG_HAVE_IOREMAP_PROT
378                         vma = find_vma(mm, addr);
379                         if (!vma)
380                                 break;
381                         if (vma->vm_ops && vma->vm_ops->access)
382                                 ret = vma->vm_ops->access(vma, addr, buf,
383                                                 len, write);
384                         if (ret <= 0)
385 #endif
386                                 break;
387                         bytes = ret;
388                 } else {
389                         bytes = len;
390                         offset = addr & (PAGE_SIZE-1);
391                         if (bytes > PAGE_SIZE-offset)
392                                 bytes = PAGE_SIZE-offset;
393
394                         maddr = kmap(page);
395                         if (write) {
396                                 copy_to_user_page(vma, page, addr,
397                                                 maddr + offset, buf, bytes);
398                                 set_page_dirty_lock(page);
399                         } else {
400                                 copy_from_user_page(vma, page, addr,
401                                                 buf, maddr + offset, bytes);
402                         }
403                         kunmap(page);
404                         page_cache_release(page);
405                 }
406                 len -= bytes;
407                 buf += bytes;
408                 addr += bytes;
409         }
410         up_read(&mm->mmap_sem);
411         mmput(mm);
412
413         return buf - old_buf;
414
415 }
416
417 int page_present (struct mm_struct *mm, unsigned long addr)
418 {
419         pgd_t *pgd;
420         pmd_t *pmd;
421         pte_t *pte;
422         int ret = 0;
423 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
424         pud_t *pud;
425 #endif
426
427         //printk("page_present\n");
428         //BUG_ON(down_read_trylock(&mm->mmap_sem) == 0);
429         down_read (&mm->mmap_sem);
430         spin_lock (&(mm->page_table_lock));
431         pgd = pgd_offset (mm, addr);
432         //printk("pgd %p\n", pgd);
433         if ((pgd != NULL) && pgd_present (*pgd))
434         {
435                 //printk("pgd_present\n");
436 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
437                 pud = pud_offset (pgd, addr);
438                 //printk("pud %p\n", pud);
439                 if ((pud != NULL) && pud_present (*pud))
440                 {
441                         pmd = pmd_offset (pud, addr);
442 #else
443                         {
444                                 pmd = pmd_offset (pgd, addr);
445 #endif
446                                 //printk("pmd %p\n", pmd);
447                                 if ((pmd != NULL) && pmd_present (*pmd))
448                                 {
449                                         //spinlock_t *ptl;
450                                         //printk("pmd_present\n");
451                                         pte = pte_offset_map (pmd, addr);
452                                         //pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
453                                         //printk("pte %p/%lx\n", pte, addr);
454                                         if ((pte != NULL) && pte_present (*pte))
455                                         {
456                                                 ret = 1;
457                                                 //printk("pte_present\n");
458                                         }
459                                         pte_unmap (pte);
460                                         //pte_unmap_unlock(pte, ptl);
461                                 }
462                         }
463                 }
464                 spin_unlock (&(mm->page_table_lock));
465                 up_read (&mm->mmap_sem);
466                 //printk("page_present %d\n", ret);
467                 return ret;
468         }
469
470
471 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
472 EXPORT_SYMBOL_GPL (page_present);
473 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
474