97daf85279d0c612dda884f48dba54dfa8b3f502
[kernel/swap-modules.git] / uprobe / arch / x86 / swap-asm / swap_uprobes.c
1 /**
2  * uprobe/arch/asm-x86/swap_uprobes.c
3  * @author Alexey Gerenkov <a.gerenkov@samsung.com> User-Space Probes initial
4  * implementation; Support x86/ARM/MIPS for both user and kernel spaces.
5  * @author Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for
6  * separating core and arch parts
7  *
8  * @section LICENSE
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23  *
24  * @section COPYRIGHT
25  *
26  * Copyright (C) Samsung Electronics, 2006-2010
27  *
28  * @section DESCRIPTION
29  *
30  * Arch-dependent uprobe interface implementation for x86.
31  */
32
33
34 #include <linux/kdebug.h>
35
36 #include <kprobe/swap_slots.h>
37 #include <kprobe/swap_td_raw.h>
38 #include <uprobe/swap_uprobes.h>
39
40 #include "swap_uprobes.h"
41
42
43 struct save_context {
44         struct pt_regs save_regs;
45         struct pt_regs *ptr_regs;
46         unsigned long val;
47         int (*handler)(struct uprobe *, struct pt_regs *);
48 };
49
50 /**
51  * @struct uprobe_ctlblk
52  * @brief Uprobe control block
53  */
54 struct uprobe_ctlblk {
55         unsigned long flags;            /**< Flags */
56         struct uprobe *p;               /**< Pointer to the uprobe */
57
58         struct save_context ctx;
59 };
60
61
62 static struct td_raw td_raw;
63
64
65 static unsigned long trampoline_addr(struct uprobe *up)
66 {
67         return (unsigned long)(up->ainsn.insn +
68                                UPROBES_TRAMP_RET_BREAK_IDX);
69 }
70
71 unsigned long arch_tramp_by_ri(struct uretprobe_instance *ri)
72 {
73         return trampoline_addr(&ri->rp->up);
74 }
75
76 static struct uprobe_ctlblk *current_ucb(void)
77 {
78         return (struct uprobe_ctlblk *)swap_td_raw(&td_raw, current);
79 }
80
81 static struct save_context *current_ctx(void)
82 {
83         return &current_ucb()->ctx;
84 }
85
86 static struct uprobe *get_current_probe(void)
87 {
88         return current_ucb()->p;
89 }
90
91 static void set_current_probe(struct uprobe *p)
92 {
93         current_ucb()->p = p;
94 }
95
96 static void save_current_flags(struct pt_regs *regs)
97 {
98         current_ucb()->flags = regs->flags;
99 }
100
101 static void restore_current_flags(struct pt_regs *regs, unsigned long flags)
102 {
103         regs->flags &= ~IF_MASK;
104         regs->flags |= flags & IF_MASK;
105 }
106
107 /**
108  * @brief Prepares uprobe for x86.
109  *
110  * @param up Pointer to the uprobe.
111  * @return 0 on success,\n
112  * -1 on error.
113  */
114 int arch_prepare_uprobe(struct uprobe *p)
115 {
116         struct task_struct *task = p->task;
117         u8 tramp[UPROBES_TRAMP_LEN + BP_INSN_SIZE];     /* BP for uretprobe */
118         enum { call_relative_opcode = 0xe8 };
119
120         if (!read_proc_vm_atomic(task, (unsigned long)p->addr,
121                                  tramp, MAX_INSN_SIZE)) {
122                 printk(KERN_ERR "failed to read memory %p!\n", p->addr);
123                 return -EINVAL;
124         }
125         /* TODO: this is a workaround */
126         if (tramp[0] == call_relative_opcode) {
127                 printk(KERN_INFO "cannot install probe: 1st instruction is call\n");
128                 return -EINVAL;
129         }
130
131         tramp[UPROBES_TRAMP_RET_BREAK_IDX] = BREAKPOINT_INSTRUCTION;
132
133         p->opcode = tramp[0];
134         p->ainsn.boostable = swap_can_boost(tramp) ? 0 : -1;
135
136         p->ainsn.insn = swap_slot_alloc(p->sm);
137         if (p->ainsn.insn == NULL) {
138                 printk(KERN_ERR "trampoline out of memory\n");
139                 return -ENOMEM;
140         }
141
142         if (!write_proc_vm_atomic(task, (unsigned long)p->ainsn.insn,
143                                   tramp, sizeof(tramp))) {
144                 swap_slot_free(p->sm, p->ainsn.insn);
145                 printk(KERN_INFO "failed to write memory %p!\n", tramp);
146                 return -EINVAL;
147         }
148
149         /* for uretprobe */
150         add_uprobe_table(p);
151
152         return 0;
153 }
154
155 /**
156  * @brief Jump pre-handler.
157  *
158  * @param p Pointer to the uprobe.
159  * @param regs Pointer to CPU register data.
160  * @return 0.
161  */
162 int setjmp_upre_handler(struct uprobe *p, struct pt_regs *regs)
163 {
164         struct ujprobe *jp = container_of(p, struct ujprobe, up);
165         entry_point_t entry = (entry_point_t)jp->entry;
166         unsigned long args[6];
167
168         /* FIXME some user space apps crash if we clean interrupt bit */
169         /* regs->EREG(flags) &= ~IF_MASK; */
170 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
171         trace_hardirqs_off();
172 #endif
173
174         /* read first 6 args from stack */
175         if (!read_proc_vm_atomic(current, regs->EREG(sp) + 4,
176                                  args, sizeof(args)))
177                 printk(KERN_WARNING
178                        "failed to read user space func arguments %lx!\n",
179                        regs->sp + 4);
180
181         if (entry)
182                 entry(args[0], args[1], args[2], args[3], args[4], args[5]);
183         else
184                 arch_ujprobe_return();
185
186         return 0;
187 }
188
189 /**
190  * @brief Prepares uretprobe for x86.
191  *
192  * @param ri Pointer to the uretprobe instance.
193  * @param regs Pointer to CPU register data.
194  * @return Void.
195  */
196 int arch_prepare_uretprobe(struct uretprobe_instance *ri, struct pt_regs *regs)
197 {
198         /* Replace the return addr with trampoline addr */
199         unsigned long ra = trampoline_addr(&ri->rp->up);
200         unsigned long ret_addr;
201         ri->sp = (kprobe_opcode_t *)regs->sp;
202
203         if (get_user(ret_addr, (unsigned long *)regs->sp)) {
204                 pr_err("failed to read user space func ra %lx addr=%p!\n",
205                        regs->sp, ri->rp->up.addr);
206                 return -EINVAL;
207         }
208
209         if (put_user(ra, (unsigned long *)regs->sp)) {
210                 pr_err("failed to write user space func ra %lx!\n", regs->sp);
211                 return -EINVAL;
212         }
213
214         ri->ret_addr = (uprobe_opcode_t *)ret_addr;
215
216         return 0;
217 }
218
219 static bool get_long(struct task_struct *task,
220                      unsigned long vaddr, unsigned long *val)
221 {
222         return sizeof(*val) != read_proc_vm_atomic(task, vaddr,
223                                                    val, sizeof(*val));
224 }
225
226 static bool put_long(struct task_struct *task,
227                      unsigned long vaddr, unsigned long *val)
228 {
229         return sizeof(*val) != write_proc_vm_atomic(task, vaddr,
230                                                     val, sizeof(*val));
231 }
232
233 /**
234  * @brief Disarms uretprobe on x86 arch.
235  *
236  * @param ri Pointer to the uretprobe instance.
237  * @param task Pointer to the task for which the probe.
238  * @return 0 on success,\n
239  * negative error code on error.
240  */
241 int arch_disarm_urp_inst(struct uretprobe_instance *ri,
242                          struct task_struct *task)
243 {
244         unsigned long ret_addr;
245         unsigned long sp = (unsigned long)ri->sp;
246         unsigned long tramp_addr = trampoline_addr(&ri->rp->up);
247
248         if (get_long(task, sp, &ret_addr)) {
249                 printk(KERN_INFO "---> %s (%d/%d): failed to read stack from %08lx\n",
250                        task->comm, task->tgid, task->pid, sp);
251                 return -EFAULT;
252         }
253
254         if (tramp_addr == ret_addr) {
255                 if (put_long(task, sp, (unsigned long *)&ri->ret_addr)) {
256                         printk(KERN_INFO "---> %s (%d/%d): failed to write "
257                                "orig_ret_addr to %08lx",
258                                task->comm, task->tgid, task->pid, sp);
259                         return -EFAULT;
260                 }
261         } else {
262                 printk(KERN_INFO "---> %s (%d/%d): trampoline NOT found at sp = %08lx\n",
263                        task->comm, task->tgid, task->pid, sp);
264                 return -ENOENT;
265         }
266
267         return 0;
268 }
269
270 /**
271  * @brief Gets trampoline address.
272  *
273  * @param p Pointer to the uprobe.
274  * @param regs Pointer to CPU register data.
275  * @return Trampoline address.
276  */
277 unsigned long arch_get_trampoline_addr(struct uprobe *p, struct pt_regs *regs)
278 {
279         return trampoline_addr(p);
280 }
281
282 /**
283  * @brief Restores return address.
284  *
285  * @param orig_ret_addr Original return address.
286  * @param regs Pointer to CPU register data.
287  * @return Void.
288  */
289 void arch_set_orig_ret_addr(unsigned long orig_ret_addr, struct pt_regs *regs)
290 {
291         regs->EREG(ip) = orig_ret_addr;
292 }
293
294 /**
295  * @brief Removes uprobe.
296  *
297  * @param up Pointer to the target uprobe.
298  * @return Void.
299  */
300 void arch_remove_uprobe(struct uprobe *p)
301 {
302         swap_slot_free(p->sm, p->ainsn.insn);
303 }
304
305 int arch_arm_uprobe(struct uprobe *p)
306 {
307         int ret;
308         uprobe_opcode_t insn = BREAKPOINT_INSTRUCTION;
309         unsigned long vaddr = (unsigned long)p->addr;
310
311         ret = write_proc_vm_atomic(p->task, vaddr, &insn, sizeof(insn));
312         if (!ret) {
313                 pr_err("arch_arm_uprobe: failed to write memory tgid=%u vaddr=%08lx\n",
314                        p->task->tgid, vaddr);
315
316                 return -EACCES;
317         }
318
319         return 0;
320 }
321
322 void arch_disarm_uprobe(struct uprobe *p, struct task_struct *task)
323 {
324         int ret;
325         unsigned long vaddr = (unsigned long)p->addr;
326
327         ret = write_proc_vm_atomic(task, vaddr, &p->opcode, sizeof(p->opcode));
328         if (!ret) {
329                 pr_err("arch_disarm_uprobe: failed to write memory tgid=%u, vaddr=%08lx\n",
330                        task->tgid, vaddr);
331         }
332 }
333
334 static void set_user_jmp_op(void *from, void *to)
335 {
336         struct __arch_jmp_op {
337                 char op;
338                 long raddr;
339         } __packed jop;
340
341         jop.raddr = (long)(to) - ((long)(from) + 5);
342         jop.op = RELATIVEJUMP_INSTRUCTION;
343
344         if (put_user(jop.op, (char *)from) ||
345             put_user(jop.raddr, (long *)(from + 1)))
346                 pr_err("failed to write jump opcode to user space %p\n", from);
347 }
348
349 static void resume_execution(struct uprobe *p,
350                              struct pt_regs *regs,
351                              unsigned long flags)
352 {
353         unsigned long *tos, tos_dword = 0;
354         unsigned long copy_eip = (unsigned long)p->ainsn.insn;
355         unsigned long orig_eip = (unsigned long)p->addr;
356         uprobe_opcode_t insns[2];
357
358         regs->EREG(flags) &= ~TF_MASK;
359
360         tos = (unsigned long *)&tos_dword;
361         if (get_user(tos_dword, (unsigned long *)regs->sp)) {
362                 pr_err("failed to read from user space sp=%lx!\n", regs->sp);
363                 return;
364         }
365
366         if (get_user(*(unsigned short *)insns, (unsigned short *)p->ainsn.insn)) {
367                 pr_err("failed to read first 2 opcodes %p!\n", p->ainsn.insn);
368                 return;
369         }
370
371         switch (insns[0]) {
372         case 0x9c: /* pushfl */
373                 *tos &= ~(TF_MASK | IF_MASK);
374                 *tos |= flags & (TF_MASK | IF_MASK);
375                 break;
376         case 0xc2: /* iret/ret/lret */
377         case 0xc3:
378         case 0xca:
379         case 0xcb:
380         case 0xcf:
381         case 0xea: /* jmp absolute -- eip is correct */
382                 /* eip is already adjusted, no more changes required */
383                 p->ainsn.boostable = 1;
384                 goto no_change;
385         case 0xe8: /* call relative - Fix return addr */
386                 *tos = orig_eip + (*tos - copy_eip);
387                 break;
388         case 0x9a: /* call absolute -- same as call absolute, indirect */
389                 *tos = orig_eip + (*tos - copy_eip);
390
391                 if (put_user(tos_dword, (unsigned long *)regs->sp)) {
392                         pr_err("failed to write dword to sp=%lx\n", regs->sp);
393                         return;
394                 }
395
396                 goto no_change;
397         case 0xff:
398                 if ((insns[1] & 0x30) == 0x10) {
399                         /*
400                          * call absolute, indirect
401                          * Fix return addr; eip is correct.
402                          * But this is not boostable
403                          */
404                         *tos = orig_eip + (*tos - copy_eip);
405
406                         if (put_user(tos_dword, (unsigned long *)regs->sp)) {
407                                 pr_err("failed to write dword to sp=%lx\n", regs->sp);
408                                 return;
409                         }
410
411                         goto no_change;
412                 } else if (((insns[1] & 0x31) == 0x20) || /* jmp near, absolute
413                                                            * indirect */
414                            ((insns[1] & 0x31) == 0x21)) {
415                         /* jmp far, absolute indirect */
416                         /* eip is correct. And this is boostable */
417                         p->ainsn.boostable = 1;
418                         goto no_change;
419                 }
420         case 0xf3:
421                 if (insns[1] == 0xc3)
422                         /* repz ret special handling: no more changes */
423                         goto no_change;
424                 break;
425         default:
426                 break;
427         }
428
429         if (put_user(tos_dword, (unsigned long *)regs->sp)) {
430                 pr_err("failed to write dword to sp=%lx\n", regs->sp);
431                 return;
432         }
433
434         if (p->ainsn.boostable == 0) {
435                 if ((regs->EREG(ip) > copy_eip) && (regs->EREG(ip) - copy_eip) +
436                     5 < MAX_INSN_SIZE) {
437                         /*
438                          * These instructions can be executed directly if it
439                          * jumps back to correct address.
440                          */
441                         set_user_jmp_op((void *) regs->EREG(ip),
442                                         (void *)orig_eip +
443                                         (regs->EREG(ip) - copy_eip));
444                         p->ainsn.boostable = 1;
445                 } else {
446                         p->ainsn.boostable = -1;
447                 }
448         }
449
450         regs->EREG(ip) = orig_eip + (regs->EREG(ip) - copy_eip);
451
452 no_change:
453         return;
454 }
455
456 static void prepare_tramp(struct uprobe *p, struct pt_regs *regs)
457 {
458         regs->ip = (unsigned long)p->ainsn.insn;
459 }
460
461 static void prepare_ss(struct pt_regs *regs)
462 {
463         /* set single step mode */
464         regs->flags |= TF_MASK;
465         regs->flags &= ~IF_MASK;
466 }
467
468
469 static unsigned long resume_userspace_addr;
470
471 static void __used __up_handler(void)
472 {
473         struct pt_regs *regs = current_ctx()->ptr_regs;
474         struct thread_info *tinfo = current_thread_info();
475         struct uprobe *p = current_ucb()->p;
476
477         /* restore KS regs */
478         *regs = current_ctx()->save_regs;
479
480         /* call handler */
481         current_ctx()->handler(p, regs);
482
483         /* resume_userspace */
484         asm volatile (
485                 "movl %0, %%esp\n"
486                 "movl %1, %%ebp\n"
487                 "jmpl *%2\n"
488                 : /* No outputs. */
489                 : "r" (regs), "r" (tinfo) , "r" (resume_userspace_addr)
490         );
491 }
492
493 void up_handler(void);
494 __asm(
495         "up_handler:\n"
496         /* skip hex tractor-driver bytes to make some free space (skip regs) */
497         "sub $0x300, %esp\n"
498         "jmp __up_handler\n"
499 );
500
501 static int exceptions_handler(struct pt_regs *regs,
502                               int (*handler)(struct uprobe *, struct pt_regs *))
503 {
504         /* save regs */
505         current_ctx()->save_regs = *regs;
506         current_ctx()->ptr_regs = regs;
507
508         /* set handler */
509         current_ctx()->handler = handler;
510
511         /* setup regs to return to KS */
512         regs->ip = (unsigned long)up_handler;
513         regs->ds = __USER_DS;
514         regs->es = __USER_DS;
515         regs->fs = __KERNEL_PERCPU;
516         regs->cs = __KERNEL_CS | get_kernel_rpl();
517         regs->gs = 0;
518         regs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
519
520         return 1;
521 }
522
523 static int uprobe_handler_retprobe(struct uprobe *p, struct pt_regs *regs)
524 {
525         int ret;
526
527         ret = trampoline_uprobe_handler(p, regs);
528         set_current_probe(NULL);
529         put_up(p);
530
531         return ret;
532 }
533
534 static int uprobe_handler_part2(struct uprobe *p, struct pt_regs *regs)
535 {
536         if (!p->pre_handler(p, regs)) {
537                 prepare_tramp(p, regs);
538                 if (p->ainsn.boostable == 1 && !p->post_handler)
539                         goto exit_and_put_up;
540
541                 save_current_flags(regs);
542                 set_current_probe(p);
543                 prepare_ss(regs);
544
545                 return 1;
546         }
547
548 exit_and_put_up:
549         set_current_probe(NULL);
550         put_up(p);
551         return 1;
552 }
553
554 static int uprobe_handler_atomic(struct pt_regs *regs)
555 {
556         pid_t tgid = current->tgid;
557         unsigned long vaddr = regs->ip - 1;
558         struct uprobe *p = get_uprobe((void *)vaddr, tgid);
559
560         if (p) {
561                 get_up(p);
562                 if (p->pre_handler) {
563                         set_current_probe(p);
564                         exceptions_handler(regs, uprobe_handler_part2);
565                 } else {
566                         uprobe_handler_part2(p, regs);
567                 }
568         } else {
569                 unsigned long tramp_vaddr;
570
571                 tramp_vaddr = vaddr - UPROBES_TRAMP_RET_BREAK_IDX;
572                 p = get_uprobe_by_insn_slot((void *)tramp_vaddr, tgid, regs);
573                 if (p == NULL) {
574                         pr_info("no_uprobe\n");
575                         return 0;
576                 }
577
578                 set_current_probe(p);
579                 get_up(p);
580                 exceptions_handler(regs, uprobe_handler_retprobe);
581         }
582
583         return 1;
584 }
585
586 static int post_uprobe_handler(struct uprobe *p, struct pt_regs *regs)
587 {
588         unsigned long flags = current_ucb()->flags;
589
590         resume_execution(p, regs, flags);
591         restore_current_flags(regs, flags);
592
593         /* reset current probe */
594         set_current_probe(NULL);
595         put_up(p);
596
597         return 1;
598 }
599
600 static int post_uprobe_handler_atomic(struct pt_regs *regs)
601 {
602         struct uprobe *p = get_current_probe();
603
604         if (p) {
605                 exceptions_handler(regs, post_uprobe_handler);
606         } else {
607                 pr_info("task[%u %u %s] current uprobe is not found\n",
608                         current->tgid, current->pid, current->comm);
609         }
610
611         return !!p;
612 }
613
614 static int uprobe_exceptions_notify(struct notifier_block *self,
615                                     unsigned long val, void *data)
616 {
617         struct die_args *args = (struct die_args *)data;
618         int ret = NOTIFY_DONE;
619
620         if (args->regs == NULL || !swap_user_mode(args->regs))
621                 return ret;
622
623         switch (val) {
624 #ifdef CONFIG_KPROBES
625         case DIE_INT3:
626 #else
627         case DIE_TRAP:
628 #endif
629                 if (uprobe_handler_atomic(args->regs))
630                         ret = NOTIFY_STOP;
631                 break;
632         case DIE_DEBUG:
633                 if (post_uprobe_handler_atomic(args->regs))
634                         ret = NOTIFY_STOP;
635                 break;
636         default:
637                 break;
638         }
639
640         return ret;
641 }
642
643 static struct notifier_block uprobe_exceptions_nb = {
644         .notifier_call = uprobe_exceptions_notify,
645         .priority = INT_MAX
646 };
647
648 struct up_valid_struct {
649         struct uprobe *p;
650         bool found;
651 };
652
653 static int __uprobe_is_valid(struct uprobe *p, void *data)
654 {
655         struct up_valid_struct *valid = (struct up_valid_struct *)data;
656
657         if (valid->p == p) {
658                 valid->found = true;
659                 return 1;
660         }
661
662         return 0;
663 }
664
665 static bool uprobe_is_valid(struct uprobe *p)
666 {
667         struct up_valid_struct valid = {
668                 .p = p,
669                 .found = false,
670         };
671
672         for_each_uprobe(__uprobe_is_valid, (void *)&valid);
673
674         return valid.found;
675 }
676
677 static int do_exit_handler(struct kprobe *kp, struct pt_regs *regs)
678 {
679         struct uprobe *p;
680
681         p = get_current_probe();
682         if (p && uprobe_is_valid(p)) {
683                 set_current_probe(NULL);
684                 put_up(p);
685         }
686
687         return 0;
688 }
689
690 static struct kprobe kp_do_exit = {
691         .pre_handler = do_exit_handler
692 };
693
694 /**
695  * @brief Registers notify.
696  *
697  * @return register_die_notifier result.
698  */
699 int swap_arch_init_uprobes(void)
700 {
701         int ret;
702         const char *sym;
703
704         sym = "resume_userspace";
705         resume_userspace_addr = swap_ksyms(sym);
706         if (resume_userspace_addr == 0)
707                 goto not_found;
708
709         sym = "do_exit";
710         kp_do_exit.addr = swap_ksyms(sym);
711         if (kp_do_exit.addr == 0)
712                 goto not_found;
713
714         ret = swap_td_raw_reg(&td_raw, sizeof(struct uprobe_ctlblk));
715         if (ret)
716                 return ret;
717
718         ret = register_die_notifier(&uprobe_exceptions_nb);
719         if (ret)
720                 goto unreg_td;
721
722         ret = swap_register_kprobe(&kp_do_exit);
723         if (ret)
724                 goto unreg_exeption;
725
726         return 0;
727
728 unreg_exeption:
729         unregister_die_notifier(&uprobe_exceptions_nb);
730 unreg_td:
731         swap_td_raw_unreg(&td_raw);
732         return ret;
733
734 not_found:
735         pr_err("symbol '%s' not found\n", sym);
736         return -ESRCH;
737 }
738
739 /**
740  * @brief Unregisters notify.
741  *
742  * @return Void.
743  */
744 void swap_arch_exit_uprobes(void)
745 {
746         swap_unregister_kprobe(&kp_do_exit);
747         unregister_die_notifier(&uprobe_exceptions_nb);
748         swap_td_raw_unreg(&td_raw);
749 }
750