perf/x86: Add perf text poke events for kprobes
authorAdrian Hunter <adrian.hunter@intel.com>
Tue, 12 May 2020 12:19:12 +0000 (15:19 +0300)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 15 Jun 2020 12:09:49 +0000 (14:09 +0200)
Add perf text poke events for kprobes. That includes:

 - the replaced instruction(s) which are executed out-of-line
   i.e. arch_copy_kprobe() and arch_remove_kprobe()

 - the INT3 that activates the kprobe
   i.e. arch_arm_kprobe() and arch_disarm_kprobe()

 - optimised kprobe function
   i.e. arch_prepare_optimized_kprobe() and
      __arch_remove_optimized_kprobe()

 - optimised kprobe
   i.e. arch_optimize_kprobes() and arch_unoptimize_kprobe()

Resulting in 8 possible text_poke events:

 0:  NULL -> probe.ainsn.insn (if ainsn.boostable && !kp.post_handler)
arch_copy_kprobe()

 1:  old0 -> INT3 arch_arm_kprobe()

 // boosted kprobe active

 2:  NULL -> optprobe_trampoline arch_prepare_optimized_kprobe()

 3:  INT3,old1,old2,old3,old4 -> JMP32 arch_optimize_kprobes()

 // optprobe active

 4:  JMP32 -> INT3,old1,old2,old3,old4

 // optprobe disabled and kprobe active (this sometimes goes back to 3)
arch_unoptimize_kprobe()

 5:  optprobe_trampoline -> NULL arch_remove_optimized_kprobe()

 // boosted kprobe active

 6:  INT3 -> old0 arch_disarm_kprobe()

 7:  probe.ainsn.insn -> NULL (if ainsn.boostable && !kp.post_handler)
arch_remove_kprobe()

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lkml.kernel.org/r/20200512121922.8997-6-adrian.hunter@intel.com
arch/x86/include/asm/kprobes.h
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/opt.c

index 073eb7a..143bc9a 100644 (file)
@@ -66,6 +66,8 @@ struct arch_specific_insn {
         */
        bool boostable;
        bool if_modifier;
+       /* Number of bytes of text poked */
+       int tp_len;
 };
 
 struct arch_optimized_insn {
index 3bafe1b..bcc53c0 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
 #include <linux/sched/debug.h>
+#include <linux/perf_event.h>
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
@@ -471,6 +472,9 @@ static int arch_copy_kprobe(struct kprobe *p)
        /* Also, displacement change doesn't affect the first byte */
        p->opcode = buf[0];
 
+       p->ainsn.tp_len = len;
+       perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len);
+
        /* OK, write back the instruction(s) into ROX insn buffer */
        text_poke(p->ainsn.insn, buf, len);
 
@@ -502,12 +506,18 @@ int arch_prepare_kprobe(struct kprobe *p)
 
 void arch_arm_kprobe(struct kprobe *p)
 {
-       text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
+       u8 int3 = INT3_INSN_OPCODE;
+
+       text_poke(p->addr, &int3, 1);
        text_poke_sync();
+       perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
 }
 
 void arch_disarm_kprobe(struct kprobe *p)
 {
+       u8 int3 = INT3_INSN_OPCODE;
+
+       perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
        text_poke(p->addr, &p->opcode, 1);
        text_poke_sync();
 }
@@ -515,6 +525,9 @@ void arch_disarm_kprobe(struct kprobe *p)
 void arch_remove_kprobe(struct kprobe *p)
 {
        if (p->ainsn.insn) {
+               /* Record the perf event before freeing the slot */
+               perf_event_text_poke(p->ainsn.insn, p->ainsn.insn,
+                                    p->ainsn.tp_len, NULL, 0);
                free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
                p->ainsn.insn = NULL;
        }
index 321c199..3239b6a 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) Hitachi Ltd., 2012
  */
 #include <linux/kprobes.h>
+#include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -352,8 +353,15 @@ int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 static
 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 {
-       if (op->optinsn.insn) {
-               free_optinsn_slot(op->optinsn.insn, dirty);
+       u8 *slot = op->optinsn.insn;
+       if (slot) {
+               int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
+
+               /* Record the perf event before freeing the slot */
+               if (dirty)
+                       perf_event_text_poke(slot, slot, len, NULL, 0);
+
+               free_optinsn_slot(slot, dirty);
                op->optinsn.insn = NULL;
                op->optinsn.size = 0;
        }
@@ -424,8 +432,15 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
                           (u8 *)op->kp.addr + op->optinsn.size);
        len += JMP32_INSN_SIZE;
 
+       /*
+        * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
+        * used in __arch_remove_optimized_kprobe().
+        */
+
        /* We have to use text_poke() for instruction buffer because it is RO */
+       perf_event_text_poke(slot, NULL, 0, buf, len);
        text_poke(slot, buf, len);
+
        ret = 0;
 out:
        kfree(buf);
@@ -477,10 +492,23 @@ void arch_optimize_kprobes(struct list_head *oplist)
  */
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-       arch_arm_kprobe(&op->kp);
-       text_poke(op->kp.addr + INT3_INSN_SIZE,
-                 op->optinsn.copied_insn, DISP32_SIZE);
+       u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
+       u8 old[JMP32_INSN_SIZE];
+       u8 *addr = op->kp.addr;
+
+       memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
+       memcpy(new + INT3_INSN_SIZE,
+              op->optinsn.copied_insn,
+              JMP32_INSN_SIZE - INT3_INSN_SIZE);
+
+       text_poke(addr, new, INT3_INSN_SIZE);
        text_poke_sync();
+       text_poke(addr + INT3_INSN_SIZE,
+                 new + INT3_INSN_SIZE,
+                 JMP32_INSN_SIZE - INT3_INSN_SIZE);
+       text_poke_sync();
+
+       perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
 }
 
 /*