4aae5f1563ae6b3a5f04f2f5584e83131b57c77b
[kernel/swap-modules.git] / uprobe / arch / x86 / swap-asm / swap_uprobes.c
1 /**
2  * uprobe/arch/asm-x86/swap_uprobes.c
3  * @author Alexey Gerenkov <a.gerenkov@samsung.com> User-Space Probes initial
4  * implementation; Support x86/ARM/MIPS for both user and kernel spaces.
5  * @author Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for
6  * separating core and arch parts
7  *
8  * @section LICENSE
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23  *
24  * @section COPYRIGHT
25  *
26  * Copyright (C) Samsung Electronics, 2006-2010
27  *
28  * @section DESCRIPTION
29  *
30  * Arch-dependent uprobe interface implementation for x86.
31  */
32
33
34 #include <linux/kdebug.h>
35
36 #include <kprobe/swap_slots.h>
37 #include <kprobe/swap_td_raw.h>
38 #include <uprobe/swap_uprobes.h>
39
40 #include "swap_uprobes.h"
41
42
43 /**
44  * @struct uprobe_ctlblk
45  * @brief Uprobe control block
46  */
47 struct uprobe_ctlblk {
48         unsigned long flags;            /**< Flags */
49         struct uprobe *p;               /**< Pointer to the uprobe */
50 };
51
52
53 static struct td_raw td_raw;
54
55
56 static unsigned long trampoline_addr(struct uprobe *up)
57 {
58         return (unsigned long)(up->ainsn.insn +
59                                UPROBES_TRAMP_RET_BREAK_IDX);
60 }
61
62 unsigned long arch_tramp_by_ri(struct uretprobe_instance *ri)
63 {
64         return trampoline_addr(&ri->rp->up);
65 }
66
67 static struct uprobe_ctlblk *current_ucb(void)
68 {
69         return (struct uprobe_ctlblk *)swap_td_raw(&td_raw, current);
70 }
71
72 static struct uprobe *get_current_probe(void)
73 {
74         return current_ucb()->p;
75 }
76
77 static void set_current_probe(struct uprobe *p)
78 {
79         current_ucb()->p = p;
80 }
81
82 static void save_current_flags(struct pt_regs *regs)
83 {
84         current_ucb()->flags = regs->flags;
85 }
86
87 static void restore_current_flags(struct pt_regs *regs, unsigned long flags)
88 {
89         regs->flags &= ~IF_MASK;
90         regs->flags |= flags & IF_MASK;
91 }
92
93 /**
94  * @brief Prepares uprobe for x86.
95  *
96  * @param up Pointer to the uprobe.
97  * @return 0 on success,\n
98  * -1 on error.
99  */
100 int arch_prepare_uprobe(struct uprobe *p)
101 {
102         struct task_struct *task = p->task;
103         u8 *tramp = p->atramp.tramp;
104         enum { call_relative_opcode = 0xe8 };
105
106         if (!read_proc_vm_atomic(task, (unsigned long)p->addr,
107                                  tramp, MAX_INSN_SIZE)) {
108                 printk(KERN_ERR "failed to read memory %p!\n", p->addr);
109                 return -EINVAL;
110         }
111         /* TODO: this is a workaround */
112         if (tramp[0] == call_relative_opcode) {
113                 printk(KERN_INFO "cannot install probe: 1st instruction is call\n");
114                 return -EINVAL;
115         }
116
117         tramp[UPROBES_TRAMP_RET_BREAK_IDX] = BREAKPOINT_INSTRUCTION;
118
119         /* TODO: remove dual info */
120         p->opcode = tramp[0];
121
122         p->ainsn.boostable = swap_can_boost(tramp) ? 0 : -1;
123
124         p->ainsn.insn = swap_slot_alloc(p->sm);
125         if (p->ainsn.insn == NULL) {
126                 printk(KERN_ERR "trampoline out of memory\n");
127                 return -ENOMEM;
128         }
129
130         if (!write_proc_vm_atomic(task, (unsigned long)p->ainsn.insn,
131                                   tramp, sizeof(p->atramp.tramp))) {
132                 swap_slot_free(p->sm, p->ainsn.insn);
133                 printk(KERN_INFO "failed to write memory %p!\n", tramp);
134                 return -EINVAL;
135         }
136
137         /* for uretprobe */
138         add_uprobe_table(p);
139
140         return 0;
141 }
142
143 /**
144  * @brief Jump pre-handler.
145  *
146  * @param p Pointer to the uprobe.
147  * @param regs Pointer to CPU register data.
148  * @return 0.
149  */
150 int setjmp_upre_handler(struct uprobe *p, struct pt_regs *regs)
151 {
152         struct ujprobe *jp = container_of(p, struct ujprobe, up);
153         uprobe_pre_entry_handler_t pre_entry =
154                 (uprobe_pre_entry_handler_t)jp->pre_entry;
155         entry_point_t entry = (entry_point_t)jp->entry;
156         unsigned long args[6];
157
158         /* FIXME some user space apps crash if we clean interrupt bit */
159         /* regs->EREG(flags) &= ~IF_MASK; */
160 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
161         trace_hardirqs_off();
162 #endif
163
164         /* read first 6 args from stack */
165         if (!read_proc_vm_atomic(current, regs->EREG(sp) + 4,
166                                  args, sizeof(args)))
167                 printk(KERN_WARNING
168                        "failed to read user space func arguments %lx!\n",
169                        regs->sp + 4);
170
171         if (pre_entry)
172                 p->ss_addr[smp_processor_id()] = (uprobe_opcode_t *)
173                                                  pre_entry(jp->priv_arg, regs);
174
175         if (entry)
176                 entry(args[0], args[1], args[2], args[3], args[4], args[5]);
177         else
178                 arch_ujprobe_return();
179
180         return 0;
181 }
182
183 /**
184  * @brief Prepares uretprobe for x86.
185  *
186  * @param ri Pointer to the uretprobe instance.
187  * @param regs Pointer to CPU register data.
188  * @return Void.
189  */
190 int arch_prepare_uretprobe(struct uretprobe_instance *ri, struct pt_regs *regs)
191 {
192         /* Replace the return addr with trampoline addr */
193         unsigned long ra = trampoline_addr(&ri->rp->up);
194         unsigned long ret_addr;
195         ri->sp = (kprobe_opcode_t *)regs->sp;
196
197         if (get_user(ret_addr, (unsigned long *)regs->sp)) {
198                 pr_err("failed to read user space func ra %lx addr=%p!\n",
199                        regs->sp, ri->rp->up.addr);
200                 return -EINVAL;
201         }
202
203         if (put_user(ra, (unsigned long *)regs->sp)) {
204                 pr_err("failed to write user space func ra %lx!\n", regs->sp);
205                 return -EINVAL;
206         }
207
208         ri->ret_addr = (uprobe_opcode_t *)ret_addr;
209
210         return 0;
211 }
212
213 static bool get_long(struct task_struct *task,
214                      unsigned long vaddr, unsigned long *val)
215 {
216         return sizeof(*val) != read_proc_vm_atomic(task, vaddr,
217                                                    val, sizeof(*val));
218 }
219
220 static bool put_long(struct task_struct *task,
221                      unsigned long vaddr, unsigned long *val)
222 {
223         return sizeof(*val) != write_proc_vm_atomic(task, vaddr,
224                                                     val, sizeof(*val));
225 }
226
227 /**
228  * @brief Disarms uretprobe on x86 arch.
229  *
230  * @param ri Pointer to the uretprobe instance.
231  * @param task Pointer to the task for which the probe.
232  * @return 0 on success,\n
233  * negative error code on error.
234  */
235 int arch_disarm_urp_inst(struct uretprobe_instance *ri,
236                          struct task_struct *task, unsigned long tr)
237 {
238         unsigned long ret_addr;
239         unsigned long sp = (unsigned long)ri->sp;
240         unsigned long tramp_addr;
241
242         if (tr == 0)
243                 tramp_addr = arch_tramp_by_ri(ri);
244         else
245                 tramp_addr = tr; /* ri - invalid */
246
247         if (get_long(task, sp, &ret_addr)) {
248                 printk(KERN_INFO "---> %s (%d/%d): failed to read stack from %08lx\n",
249                        task->comm, task->tgid, task->pid, sp);
250                 return -EFAULT;
251         }
252
253         if (tramp_addr == ret_addr) {
254                 if (put_long(task, sp, (unsigned long *)&ri->ret_addr)) {
255                         printk(KERN_INFO "---> %s (%d/%d): failed to write "
256                                "orig_ret_addr to %08lx",
257                                task->comm, task->tgid, task->pid, sp);
258                         return -EFAULT;
259                 }
260         } else {
261                 printk(KERN_INFO "---> %s (%d/%d): trampoline NOT found at sp = %08lx\n",
262                        task->comm, task->tgid, task->pid, sp);
263                 return -ENOENT;
264         }
265
266         return 0;
267 }
268
269 /**
270  * @brief Gets trampoline address.
271  *
272  * @param p Pointer to the uprobe.
273  * @param regs Pointer to CPU register data.
274  * @return Trampoline address.
275  */
276 unsigned long arch_get_trampoline_addr(struct uprobe *p, struct pt_regs *regs)
277 {
278         return trampoline_addr(p);
279 }
280
281 /**
282  * @brief Restores return address.
283  *
284  * @param orig_ret_addr Original return address.
285  * @param regs Pointer to CPU register data.
286  * @return Void.
287  */
288 void arch_set_orig_ret_addr(unsigned long orig_ret_addr, struct pt_regs *regs)
289 {
290         regs->EREG(ip) = orig_ret_addr;
291 }
292
293 /**
294  * @brief Removes uprobe.
295  *
296  * @param up Pointer to the target uprobe.
297  * @return Void.
298  */
299 void arch_remove_uprobe(struct uprobe *p)
300 {
301         swap_slot_free(p->sm, p->ainsn.insn);
302 }
303
304 int arch_arm_uprobe(struct uprobe *p)
305 {
306         int ret;
307         uprobe_opcode_t insn = BREAKPOINT_INSTRUCTION;
308         unsigned long vaddr = (unsigned long)p->addr;
309
310         ret = write_proc_vm_atomic(p->task, vaddr, &insn, sizeof(insn));
311         if (!ret) {
312                 pr_err("arch_arm_uprobe: failed to write memory tgid=%u vaddr=%08lx\n",
313                        p->task->tgid, vaddr);
314
315                 return -EACCES;
316         }
317
318         return 0;
319 }
320
321 void arch_disarm_uprobe(struct uprobe *p, struct task_struct *task)
322 {
323         int ret;
324         unsigned long vaddr = (unsigned long)p->addr;
325
326         ret = write_proc_vm_atomic(task, vaddr, &p->opcode, sizeof(p->opcode));
327         if (!ret) {
328                 pr_err("arch_disarm_uprobe: failed to write memory tgid=%u, vaddr=%08lx\n",
329                        task->tgid, vaddr);
330         }
331 }
332
333 static void set_user_jmp_op(void *from, void *to)
334 {
335         struct __arch_jmp_op {
336                 char op;
337                 long raddr;
338         } __packed jop;
339
340         jop.raddr = (long)(to) - ((long)(from) + 5);
341         jop.op = RELATIVEJUMP_INSTRUCTION;
342
343         if (put_user(jop.op, (char *)from) ||
344             put_user(jop.raddr, (long *)(from + 1)))
345                 pr_err("failed to write jump opcode to user space %p\n", from);
346 }
347
348 static void resume_execution(struct uprobe *p,
349                              struct pt_regs *regs,
350                              unsigned long flags)
351 {
352         unsigned long *tos, tos_dword = 0;
353         unsigned long copy_eip = (unsigned long)p->ainsn.insn;
354         unsigned long orig_eip = (unsigned long)p->addr;
355         uprobe_opcode_t insns[2];
356
357         regs->EREG(flags) &= ~TF_MASK;
358
359         tos = (unsigned long *)&tos_dword;
360         if (get_user(tos_dword, (unsigned long *)regs->sp)) {
361                 pr_err("failed to read from user space sp=%lx!\n", regs->sp);
362                 return;
363         }
364
365         if (get_user(*(unsigned short *)insns, (unsigned short *)p->ainsn.insn)) {
366                 pr_err("failed to read first 2 opcodes %p!\n", p->ainsn.insn);
367                 return;
368         }
369
370         switch (insns[0]) {
371         case 0x9c: /* pushfl */
372                 *tos &= ~(TF_MASK | IF_MASK);
373                 *tos |= flags & (TF_MASK | IF_MASK);
374                 break;
375         case 0xc2: /* iret/ret/lret */
376         case 0xc3:
377         case 0xca:
378         case 0xcb:
379         case 0xcf:
380         case 0xea: /* jmp absolute -- eip is correct */
381                 /* eip is already adjusted, no more changes required */
382                 p->ainsn.boostable = 1;
383                 goto no_change;
384         case 0xe8: /* call relative - Fix return addr */
385                 *tos = orig_eip + (*tos - copy_eip);
386                 break;
387         case 0x9a: /* call absolute -- same as call absolute, indirect */
388                 *tos = orig_eip + (*tos - copy_eip);
389
390                 if (put_user(tos_dword, (unsigned long *)regs->sp)) {
391                         pr_err("failed to write dword to sp=%lx\n", regs->sp);
392                         return;
393                 }
394
395                 goto no_change;
396         case 0xff:
397                 if ((insns[1] & 0x30) == 0x10) {
398                         /*
399                          * call absolute, indirect
400                          * Fix return addr; eip is correct.
401                          * But this is not boostable
402                          */
403                         *tos = orig_eip + (*tos - copy_eip);
404
405                         if (put_user(tos_dword, (unsigned long *)regs->sp)) {
406                                 pr_err("failed to write dword to sp=%lx\n", regs->sp);
407                                 return;
408                         }
409
410                         goto no_change;
411                 } else if (((insns[1] & 0x31) == 0x20) || /* jmp near, absolute
412                                                            * indirect */
413                            ((insns[1] & 0x31) == 0x21)) {
414                         /* jmp far, absolute indirect */
415                         /* eip is correct. And this is boostable */
416                         p->ainsn.boostable = 1;
417                         goto no_change;
418                 }
419         case 0xf3:
420                 if (insns[1] == 0xc3)
421                         /* repz ret special handling: no more changes */
422                         goto no_change;
423                 break;
424         default:
425                 break;
426         }
427
428         if (put_user(tos_dword, (unsigned long *)regs->sp)) {
429                 pr_err("failed to write dword to sp=%lx\n", regs->sp);
430                 return;
431         }
432
433         if (p->ainsn.boostable == 0) {
434                 if ((regs->EREG(ip) > copy_eip) && (regs->EREG(ip) - copy_eip) +
435                     5 < MAX_INSN_SIZE) {
436                         /*
437                          * These instructions can be executed directly if it
438                          * jumps back to correct address.
439                          */
440                         set_user_jmp_op((void *) regs->EREG(ip),
441                                         (void *)orig_eip +
442                                         (regs->EREG(ip) - copy_eip));
443                         p->ainsn.boostable = 1;
444                 } else {
445                         p->ainsn.boostable = -1;
446                 }
447         }
448
449         regs->EREG(ip) = orig_eip + (regs->EREG(ip) - copy_eip);
450
451 no_change:
452         return;
453 }
454
455 static bool prepare_ss_addr(struct uprobe *p, struct pt_regs *regs)
456 {
457         unsigned long *ss_addr = (long *)&p->ss_addr[smp_processor_id()];
458
459         if (*ss_addr) {
460                 regs->ip = *ss_addr;
461                 *ss_addr = 0;
462                 return true;
463         } else {
464                 regs->ip = (unsigned long)p->ainsn.insn;
465                 return false;
466         }
467 }
468
469 static void prepare_ss(struct pt_regs *regs)
470 {
471         /* set single step mode */
472         regs->flags |= TF_MASK;
473         regs->flags &= ~IF_MASK;
474 }
475
476 static int uprobe_handler(struct pt_regs *regs)
477 {
478         struct uprobe *p;
479         uprobe_opcode_t *addr;
480         struct task_struct *task = current;
481         pid_t tgid = task->tgid;
482
483         save_current_flags(regs);
484
485         addr = (uprobe_opcode_t *)(regs->EREG(ip) - sizeof(uprobe_opcode_t));
486         p = get_uprobe(addr, tgid);
487
488         if (p == NULL) {
489                 void *tramp_addr = (void *)addr - UPROBES_TRAMP_RET_BREAK_IDX;
490
491                 p = get_uprobe_by_insn_slot(tramp_addr, tgid, regs);
492                 if (p == NULL) {
493                         printk(KERN_INFO "no_uprobe\n");
494                         return 0;
495                 }
496
497                 trampoline_uprobe_handler(p, regs);
498                 return 1;
499         } else {
500                 if (!p->pre_handler || !p->pre_handler(p, regs)) {
501                         if (p->ainsn.boostable == 1 && !p->post_handler) {
502                                 prepare_ss_addr(p, regs);
503                                 return 1;
504                         }
505
506                         if (prepare_ss_addr(p, regs) == false) {
507                                 set_current_probe(p);
508                                 prepare_ss(regs);
509                         }
510                 }
511         }
512
513         return 1;
514 }
515
516 static int post_uprobe_handler(struct pt_regs *regs)
517 {
518         struct uprobe *p = get_current_probe();
519         unsigned long flags = current_ucb()->flags;
520
521         if (p == NULL) {
522                 printk("task[%u %u %s] current uprobe is not found\n",
523                        current->tgid, current->pid, current->comm);
524                 return 0;
525         }
526
527         resume_execution(p, regs, flags);
528         restore_current_flags(regs, flags);
529
530         /* clean stack */
531         current_ucb()->p = 0;
532         current_ucb()->flags = 0;
533
534         return 1;
535 }
536
537 static int uprobe_exceptions_notify(struct notifier_block *self,
538                                     unsigned long val, void *data)
539 {
540         struct die_args *args = (struct die_args *)data;
541         int ret = NOTIFY_DONE;
542
543         if (args->regs == NULL || !user_mode_vm(args->regs))
544                 return ret;
545
546         switch (val) {
547 #ifdef CONFIG_KPROBES
548         case DIE_INT3:
549 #else
550         case DIE_TRAP:
551 #endif
552                 if (uprobe_handler(args->regs))
553                         ret = NOTIFY_STOP;
554                 break;
555         case DIE_DEBUG:
556                 if (post_uprobe_handler(args->regs))
557                         ret = NOTIFY_STOP;
558                 break;
559         default:
560                 break;
561         }
562
563         return ret;
564 }
565
566 static struct notifier_block uprobe_exceptions_nb = {
567         .notifier_call = uprobe_exceptions_notify,
568         .priority = INT_MAX
569 };
570
571 /**
572  * @brief Registers notify.
573  *
574  * @return register_die_notifier result.
575  */
576 int swap_arch_init_uprobes(void)
577 {
578         int ret;
579
580         ret = swap_td_raw_reg(&td_raw, sizeof(struct uprobe_ctlblk));
581         if (ret)
582                 return ret;
583
584         ret = register_die_notifier(&uprobe_exceptions_nb);
585         if (ret)
586                 swap_td_raw_unreg(&td_raw);
587
588         return ret;
589 }
590
591 /**
592  * @brief Unregisters notify.
593  *
594  * @return Void.
595  */
596 void swap_arch_exit_uprobes(void)
597 {
598         unregister_die_notifier(&uprobe_exceptions_nb);
599         swap_td_raw_unreg(&td_raw);
600 }
601