Merge branch 'next' into for-linus
[platform/kernel/linux-starfive.git] / kernel / bpf / trampoline.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
4 #include <linux/bpf.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
9 #include <linux/btf.h>
10 #include <linux/rcupdate_trace.h>
11 #include <linux/rcupdate_wait.h>
12 #include <linux/static_call.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/bpf_lsm.h>
15 #include <linux/delay.h>
16
17 /* dummy _ops. The verifier will operate on target program's ops. */
18 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
19 };
20 const struct bpf_prog_ops bpf_extension_prog_ops = {
21 };
22
23 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
24 #define TRAMPOLINE_HASH_BITS 10
25 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
26
27 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
28
29 /* serializes access to trampoline_table */
30 static DEFINE_MUTEX(trampoline_mutex);
31
32 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
33 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
34
35 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
36 {
37         struct bpf_trampoline *tr = ops->private;
38         int ret = 0;
39
40         if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
41                 /* This is called inside register_ftrace_direct_multi(), so
42                  * tr->mutex is already locked.
43                  */
44                 lockdep_assert_held_once(&tr->mutex);
45
46                 /* Instead of updating the trampoline here, we propagate
47                  * -EAGAIN to register_ftrace_direct(). Then we can
48                  * retry register_ftrace_direct() after updating the
49                  * trampoline.
50                  */
51                 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
52                     !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
53                         if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
54                                 return -EBUSY;
55
56                         tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
57                         return -EAGAIN;
58                 }
59
60                 return 0;
61         }
62
63         /* The normal locking order is
64          *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
65          *
66          * The following two commands are called from
67          *
68          *   prepare_direct_functions_for_ipmodify
69          *   cleanup_direct_functions_after_ipmodify
70          *
71          * In both cases, direct_mutex is already locked. Use
72          * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
73          * (something else is making changes to this same trampoline).
74          */
75         if (!mutex_trylock(&tr->mutex)) {
76                 /* sleep 1 ms to make sure whatever holding tr->mutex makes
77                  * some progress.
78                  */
79                 msleep(1);
80                 return -EAGAIN;
81         }
82
83         switch (cmd) {
84         case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
85                 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
86
87                 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
88                     !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
89                         ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
90                 break;
91         case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
92                 tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
93
94                 if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
95                         ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
96                 break;
97         default:
98                 ret = -EINVAL;
99                 break;
100         }
101
102         mutex_unlock(&tr->mutex);
103         return ret;
104 }
105 #endif
106
107 bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
108 {
109         enum bpf_attach_type eatype = prog->expected_attach_type;
110         enum bpf_prog_type ptype = prog->type;
111
112         return (ptype == BPF_PROG_TYPE_TRACING &&
113                 (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
114                  eatype == BPF_MODIFY_RETURN)) ||
115                 (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
116 }
117
118 void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
119 {
120         ksym->start = (unsigned long) data;
121         ksym->end = ksym->start + PAGE_SIZE;
122         bpf_ksym_add(ksym);
123         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
124                            PAGE_SIZE, false, ksym->name);
125 }
126
127 void bpf_image_ksym_del(struct bpf_ksym *ksym)
128 {
129         bpf_ksym_del(ksym);
130         perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
131                            PAGE_SIZE, true, ksym->name);
132 }
133
134 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
135 {
136         struct bpf_trampoline *tr;
137         struct hlist_head *head;
138         int i;
139
140         mutex_lock(&trampoline_mutex);
141         head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
142         hlist_for_each_entry(tr, head, hlist) {
143                 if (tr->key == key) {
144                         refcount_inc(&tr->refcnt);
145                         goto out;
146                 }
147         }
148         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
149         if (!tr)
150                 goto out;
151 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
152         tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
153         if (!tr->fops) {
154                 kfree(tr);
155                 tr = NULL;
156                 goto out;
157         }
158         tr->fops->private = tr;
159         tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
160 #endif
161
162         tr->key = key;
163         INIT_HLIST_NODE(&tr->hlist);
164         hlist_add_head(&tr->hlist, head);
165         refcount_set(&tr->refcnt, 1);
166         mutex_init(&tr->mutex);
167         for (i = 0; i < BPF_TRAMP_MAX; i++)
168                 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
169 out:
170         mutex_unlock(&trampoline_mutex);
171         return tr;
172 }
173
174 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
175 {
176         void *ip = tr->func.addr;
177         int ret;
178
179         if (tr->func.ftrace_managed)
180                 ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
181         else
182                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
183
184         return ret;
185 }
186
187 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
188                          bool lock_direct_mutex)
189 {
190         void *ip = tr->func.addr;
191         int ret;
192
193         if (tr->func.ftrace_managed) {
194                 if (lock_direct_mutex)
195                         ret = modify_ftrace_direct(tr->fops, (long)new_addr);
196                 else
197                         ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
198         } else {
199                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
200         }
201         return ret;
202 }
203
204 /* first time registering */
205 static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
206 {
207         void *ip = tr->func.addr;
208         unsigned long faddr;
209         int ret;
210
211         faddr = ftrace_location((unsigned long)ip);
212         if (faddr) {
213                 if (!tr->fops)
214                         return -ENOTSUPP;
215                 tr->func.ftrace_managed = true;
216         }
217
218         if (tr->func.ftrace_managed) {
219                 ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
220                 ret = register_ftrace_direct(tr->fops, (long)new_addr);
221         } else {
222                 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
223         }
224
225         return ret;
226 }
227
228 static struct bpf_tramp_links *
229 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
230 {
231         struct bpf_tramp_link *link;
232         struct bpf_tramp_links *tlinks;
233         struct bpf_tramp_link **links;
234         int kind;
235
236         *total = 0;
237         tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
238         if (!tlinks)
239                 return ERR_PTR(-ENOMEM);
240
241         for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
242                 tlinks[kind].nr_links = tr->progs_cnt[kind];
243                 *total += tr->progs_cnt[kind];
244                 links = tlinks[kind].links;
245
246                 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
247                         *ip_arg |= link->link.prog->call_get_func_ip;
248                         *links++ = link;
249                 }
250         }
251         return tlinks;
252 }
253
254 static void __bpf_tramp_image_put_deferred(struct work_struct *work)
255 {
256         struct bpf_tramp_image *im;
257
258         im = container_of(work, struct bpf_tramp_image, work);
259         bpf_image_ksym_del(&im->ksym);
260         bpf_jit_free_exec(im->image);
261         bpf_jit_uncharge_modmem(PAGE_SIZE);
262         percpu_ref_exit(&im->pcref);
263         kfree_rcu(im, rcu);
264 }
265
266 /* callback, fexit step 3 or fentry step 2 */
267 static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
268 {
269         struct bpf_tramp_image *im;
270
271         im = container_of(rcu, struct bpf_tramp_image, rcu);
272         INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
273         schedule_work(&im->work);
274 }
275
276 /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
277 static void __bpf_tramp_image_release(struct percpu_ref *pcref)
278 {
279         struct bpf_tramp_image *im;
280
281         im = container_of(pcref, struct bpf_tramp_image, pcref);
282         call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
283 }
284
285 /* callback, fexit or fentry step 1 */
286 static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
287 {
288         struct bpf_tramp_image *im;
289
290         im = container_of(rcu, struct bpf_tramp_image, rcu);
291         if (im->ip_after_call)
292                 /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
293                 percpu_ref_kill(&im->pcref);
294         else
295                 /* the case of fentry trampoline */
296                 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
297 }
298
299 static void bpf_tramp_image_put(struct bpf_tramp_image *im)
300 {
301         /* The trampoline image that calls original function is using:
302          * rcu_read_lock_trace to protect sleepable bpf progs
303          * rcu_read_lock to protect normal bpf progs
304          * percpu_ref to protect trampoline itself
305          * rcu tasks to protect trampoline asm not covered by percpu_ref
306          * (which are few asm insns before __bpf_tramp_enter and
307          *  after __bpf_tramp_exit)
308          *
309          * The trampoline is unreachable before bpf_tramp_image_put().
310          *
311          * First, patch the trampoline to avoid calling into fexit progs.
312          * The progs will be freed even if the original function is still
313          * executing or sleeping.
314          * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
315          * first few asm instructions to execute and call into
316          * __bpf_tramp_enter->percpu_ref_get.
317          * Then use percpu_ref_kill to wait for the trampoline and the original
318          * function to finish.
319          * Then use call_rcu_tasks() to make sure few asm insns in
320          * the trampoline epilogue are done as well.
321          *
322          * In !PREEMPT case the task that got interrupted in the first asm
323          * insns won't go through an RCU quiescent state which the
324          * percpu_ref_kill will be waiting for. Hence the first
325          * call_rcu_tasks() is not necessary.
326          */
327         if (im->ip_after_call) {
328                 int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
329                                              NULL, im->ip_epilogue);
330                 WARN_ON(err);
331                 if (IS_ENABLED(CONFIG_PREEMPTION))
332                         call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
333                 else
334                         percpu_ref_kill(&im->pcref);
335                 return;
336         }
337
338         /* The trampoline without fexit and fmod_ret progs doesn't call original
339          * function and doesn't use percpu_ref.
340          * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
341          * Then use call_rcu_tasks() to wait for the rest of trampoline asm
342          * and normal progs.
343          */
344         call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
345 }
346
347 static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
348 {
349         struct bpf_tramp_image *im;
350         struct bpf_ksym *ksym;
351         void *image;
352         int err = -ENOMEM;
353
354         im = kzalloc(sizeof(*im), GFP_KERNEL);
355         if (!im)
356                 goto out;
357
358         err = bpf_jit_charge_modmem(PAGE_SIZE);
359         if (err)
360                 goto out_free_im;
361
362         err = -ENOMEM;
363         im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
364         if (!image)
365                 goto out_uncharge;
366         set_vm_flush_reset_perms(image);
367
368         err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
369         if (err)
370                 goto out_free_image;
371
372         ksym = &im->ksym;
373         INIT_LIST_HEAD_RCU(&ksym->lnode);
374         snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
375         bpf_image_ksym_add(image, ksym);
376         return im;
377
378 out_free_image:
379         bpf_jit_free_exec(im->image);
380 out_uncharge:
381         bpf_jit_uncharge_modmem(PAGE_SIZE);
382 out_free_im:
383         kfree(im);
384 out:
385         return ERR_PTR(err);
386 }
387
388 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
389 {
390         struct bpf_tramp_image *im;
391         struct bpf_tramp_links *tlinks;
392         u32 orig_flags = tr->flags;
393         bool ip_arg = false;
394         int err, total;
395
396         tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
397         if (IS_ERR(tlinks))
398                 return PTR_ERR(tlinks);
399
400         if (total == 0) {
401                 err = unregister_fentry(tr, tr->cur_image->image);
402                 bpf_tramp_image_put(tr->cur_image);
403                 tr->cur_image = NULL;
404                 tr->selector = 0;
405                 goto out;
406         }
407
408         im = bpf_tramp_image_alloc(tr->key, tr->selector);
409         if (IS_ERR(im)) {
410                 err = PTR_ERR(im);
411                 goto out;
412         }
413
414         /* clear all bits except SHARE_IPMODIFY */
415         tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
416
417         if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
418             tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
419                 /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
420                  * should not be set together.
421                  */
422                 tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
423         } else {
424                 tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
425         }
426
427         if (ip_arg)
428                 tr->flags |= BPF_TRAMP_F_IP_ARG;
429
430 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
431 again:
432         if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
433             (tr->flags & BPF_TRAMP_F_CALL_ORIG))
434                 tr->flags |= BPF_TRAMP_F_ORIG_STACK;
435 #endif
436
437         err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
438                                           &tr->func.model, tr->flags, tlinks,
439                                           tr->func.addr);
440         if (err < 0)
441                 goto out;
442
443         set_memory_rox((long)im->image, 1);
444
445         WARN_ON(tr->cur_image && tr->selector == 0);
446         WARN_ON(!tr->cur_image && tr->selector);
447         if (tr->cur_image)
448                 /* progs already running at this address */
449                 err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
450         else
451                 /* first time registering */
452                 err = register_fentry(tr, im->image);
453
454 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
455         if (err == -EAGAIN) {
456                 /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
457                  * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
458                  * trampoline again, and retry register.
459                  */
460                 /* reset fops->func and fops->trampoline for re-register */
461                 tr->fops->func = NULL;
462                 tr->fops->trampoline = 0;
463
464                 /* reset im->image memory attr for arch_prepare_bpf_trampoline */
465                 set_memory_nx((long)im->image, 1);
466                 set_memory_rw((long)im->image, 1);
467                 goto again;
468         }
469 #endif
470         if (err)
471                 goto out;
472
473         if (tr->cur_image)
474                 bpf_tramp_image_put(tr->cur_image);
475         tr->cur_image = im;
476         tr->selector++;
477 out:
478         /* If any error happens, restore previous flags */
479         if (err)
480                 tr->flags = orig_flags;
481         kfree(tlinks);
482         return err;
483 }
484
485 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
486 {
487         switch (prog->expected_attach_type) {
488         case BPF_TRACE_FENTRY:
489                 return BPF_TRAMP_FENTRY;
490         case BPF_MODIFY_RETURN:
491                 return BPF_TRAMP_MODIFY_RETURN;
492         case BPF_TRACE_FEXIT:
493                 return BPF_TRAMP_FEXIT;
494         case BPF_LSM_MAC:
495                 if (!prog->aux->attach_func_proto->type)
496                         /* The function returns void, we cannot modify its
497                          * return value.
498                          */
499                         return BPF_TRAMP_FEXIT;
500                 else
501                         return BPF_TRAMP_MODIFY_RETURN;
502         default:
503                 return BPF_TRAMP_REPLACE;
504         }
505 }
506
507 static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
508 {
509         enum bpf_tramp_prog_type kind;
510         struct bpf_tramp_link *link_exiting;
511         int err = 0;
512         int cnt = 0, i;
513
514         kind = bpf_attach_type_to_tramp(link->link.prog);
515         if (tr->extension_prog)
516                 /* cannot attach fentry/fexit if extension prog is attached.
517                  * cannot overwrite extension prog either.
518                  */
519                 return -EBUSY;
520
521         for (i = 0; i < BPF_TRAMP_MAX; i++)
522                 cnt += tr->progs_cnt[i];
523
524         if (kind == BPF_TRAMP_REPLACE) {
525                 /* Cannot attach extension if fentry/fexit are in use. */
526                 if (cnt)
527                         return -EBUSY;
528                 tr->extension_prog = link->link.prog;
529                 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
530                                           link->link.prog->bpf_func);
531         }
532         if (cnt >= BPF_MAX_TRAMP_LINKS)
533                 return -E2BIG;
534         if (!hlist_unhashed(&link->tramp_hlist))
535                 /* prog already linked */
536                 return -EBUSY;
537         hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
538                 if (link_exiting->link.prog != link->link.prog)
539                         continue;
540                 /* prog already linked */
541                 return -EBUSY;
542         }
543
544         hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
545         tr->progs_cnt[kind]++;
546         err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
547         if (err) {
548                 hlist_del_init(&link->tramp_hlist);
549                 tr->progs_cnt[kind]--;
550         }
551         return err;
552 }
553
554 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
555 {
556         int err;
557
558         mutex_lock(&tr->mutex);
559         err = __bpf_trampoline_link_prog(link, tr);
560         mutex_unlock(&tr->mutex);
561         return err;
562 }
563
564 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
565 {
566         enum bpf_tramp_prog_type kind;
567         int err;
568
569         kind = bpf_attach_type_to_tramp(link->link.prog);
570         if (kind == BPF_TRAMP_REPLACE) {
571                 WARN_ON_ONCE(!tr->extension_prog);
572                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
573                                          tr->extension_prog->bpf_func, NULL);
574                 tr->extension_prog = NULL;
575                 return err;
576         }
577         hlist_del_init(&link->tramp_hlist);
578         tr->progs_cnt[kind]--;
579         return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
580 }
581
582 /* bpf_trampoline_unlink_prog() should never fail. */
583 int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
584 {
585         int err;
586
587         mutex_lock(&tr->mutex);
588         err = __bpf_trampoline_unlink_prog(link, tr);
589         mutex_unlock(&tr->mutex);
590         return err;
591 }
592
593 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
594 static void bpf_shim_tramp_link_release(struct bpf_link *link)
595 {
596         struct bpf_shim_tramp_link *shim_link =
597                 container_of(link, struct bpf_shim_tramp_link, link.link);
598
599         /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
600         if (!shim_link->trampoline)
601                 return;
602
603         WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
604         bpf_trampoline_put(shim_link->trampoline);
605 }
606
607 static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
608 {
609         struct bpf_shim_tramp_link *shim_link =
610                 container_of(link, struct bpf_shim_tramp_link, link.link);
611
612         kfree(shim_link);
613 }
614
615 static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
616         .release = bpf_shim_tramp_link_release,
617         .dealloc = bpf_shim_tramp_link_dealloc,
618 };
619
620 static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
621                                                      bpf_func_t bpf_func,
622                                                      int cgroup_atype)
623 {
624         struct bpf_shim_tramp_link *shim_link = NULL;
625         struct bpf_prog *p;
626
627         shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
628         if (!shim_link)
629                 return NULL;
630
631         p = bpf_prog_alloc(1, 0);
632         if (!p) {
633                 kfree(shim_link);
634                 return NULL;
635         }
636
637         p->jited = false;
638         p->bpf_func = bpf_func;
639
640         p->aux->cgroup_atype = cgroup_atype;
641         p->aux->attach_func_proto = prog->aux->attach_func_proto;
642         p->aux->attach_btf_id = prog->aux->attach_btf_id;
643         p->aux->attach_btf = prog->aux->attach_btf;
644         btf_get(p->aux->attach_btf);
645         p->type = BPF_PROG_TYPE_LSM;
646         p->expected_attach_type = BPF_LSM_MAC;
647         bpf_prog_inc(p);
648         bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
649                       &bpf_shim_tramp_link_lops, p);
650         bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
651
652         return shim_link;
653 }
654
655 static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
656                                                     bpf_func_t bpf_func)
657 {
658         struct bpf_tramp_link *link;
659         int kind;
660
661         for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
662                 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
663                         struct bpf_prog *p = link->link.prog;
664
665                         if (p->bpf_func == bpf_func)
666                                 return container_of(link, struct bpf_shim_tramp_link, link);
667                 }
668         }
669
670         return NULL;
671 }
672
673 int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
674                                     int cgroup_atype)
675 {
676         struct bpf_shim_tramp_link *shim_link = NULL;
677         struct bpf_attach_target_info tgt_info = {};
678         struct bpf_trampoline *tr;
679         bpf_func_t bpf_func;
680         u64 key;
681         int err;
682
683         err = bpf_check_attach_target(NULL, prog, NULL,
684                                       prog->aux->attach_btf_id,
685                                       &tgt_info);
686         if (err)
687                 return err;
688
689         key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
690                                          prog->aux->attach_btf_id);
691
692         bpf_lsm_find_cgroup_shim(prog, &bpf_func);
693         tr = bpf_trampoline_get(key, &tgt_info);
694         if (!tr)
695                 return  -ENOMEM;
696
697         mutex_lock(&tr->mutex);
698
699         shim_link = cgroup_shim_find(tr, bpf_func);
700         if (shim_link) {
701                 /* Reusing existing shim attached by the other program. */
702                 bpf_link_inc(&shim_link->link.link);
703
704                 mutex_unlock(&tr->mutex);
705                 bpf_trampoline_put(tr); /* bpf_trampoline_get above */
706                 return 0;
707         }
708
709         /* Allocate and install new shim. */
710
711         shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
712         if (!shim_link) {
713                 err = -ENOMEM;
714                 goto err;
715         }
716
717         err = __bpf_trampoline_link_prog(&shim_link->link, tr);
718         if (err)
719                 goto err;
720
721         shim_link->trampoline = tr;
722         /* note, we're still holding tr refcnt from above */
723
724         mutex_unlock(&tr->mutex);
725
726         return 0;
727 err:
728         mutex_unlock(&tr->mutex);
729
730         if (shim_link)
731                 bpf_link_put(&shim_link->link.link);
732
733         /* have to release tr while _not_ holding its mutex */
734         bpf_trampoline_put(tr); /* bpf_trampoline_get above */
735
736         return err;
737 }
738
739 void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
740 {
741         struct bpf_shim_tramp_link *shim_link = NULL;
742         struct bpf_trampoline *tr;
743         bpf_func_t bpf_func;
744         u64 key;
745
746         key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
747                                          prog->aux->attach_btf_id);
748
749         bpf_lsm_find_cgroup_shim(prog, &bpf_func);
750         tr = bpf_trampoline_lookup(key);
751         if (WARN_ON_ONCE(!tr))
752                 return;
753
754         mutex_lock(&tr->mutex);
755         shim_link = cgroup_shim_find(tr, bpf_func);
756         mutex_unlock(&tr->mutex);
757
758         if (shim_link)
759                 bpf_link_put(&shim_link->link.link);
760
761         bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
762 }
763 #endif
764
765 struct bpf_trampoline *bpf_trampoline_get(u64 key,
766                                           struct bpf_attach_target_info *tgt_info)
767 {
768         struct bpf_trampoline *tr;
769
770         tr = bpf_trampoline_lookup(key);
771         if (!tr)
772                 return NULL;
773
774         mutex_lock(&tr->mutex);
775         if (tr->func.addr)
776                 goto out;
777
778         memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
779         tr->func.addr = (void *)tgt_info->tgt_addr;
780 out:
781         mutex_unlock(&tr->mutex);
782         return tr;
783 }
784
785 void bpf_trampoline_put(struct bpf_trampoline *tr)
786 {
787         int i;
788
789         if (!tr)
790                 return;
791         mutex_lock(&trampoline_mutex);
792         if (!refcount_dec_and_test(&tr->refcnt))
793                 goto out;
794         WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
795
796         for (i = 0; i < BPF_TRAMP_MAX; i++)
797                 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
798                         goto out;
799
800         /* This code will be executed even when the last bpf_tramp_image
801          * is alive. All progs are detached from the trampoline and the
802          * trampoline image is patched with jmp into epilogue to skip
803          * fexit progs. The fentry-only trampoline will be freed via
804          * multiple rcu callbacks.
805          */
806         hlist_del(&tr->hlist);
807         if (tr->fops) {
808                 ftrace_free_filter(tr->fops);
809                 kfree(tr->fops);
810         }
811         kfree(tr);
812 out:
813         mutex_unlock(&trampoline_mutex);
814 }
815
816 #define NO_START_TIME 1
817 static __always_inline u64 notrace bpf_prog_start_time(void)
818 {
819         u64 start = NO_START_TIME;
820
821         if (static_branch_unlikely(&bpf_stats_enabled_key)) {
822                 start = sched_clock();
823                 if (unlikely(!start))
824                         start = NO_START_TIME;
825         }
826         return start;
827 }
828
829 /* The logic is similar to bpf_prog_run(), but with an explicit
830  * rcu_read_lock() and migrate_disable() which are required
831  * for the trampoline. The macro is split into
832  * call __bpf_prog_enter
833  * call prog->bpf_func
834  * call __bpf_prog_exit
835  *
836  * __bpf_prog_enter returns:
837  * 0 - skip execution of the bpf prog
838  * 1 - execute bpf prog
839  * [2..MAX_U64] - execute bpf prog and record execution time.
840  *     This is start time.
841  */
842 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
843         __acquires(RCU)
844 {
845         rcu_read_lock();
846         migrate_disable();
847
848         run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
849
850         if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
851                 bpf_prog_inc_misses_counter(prog);
852                 return 0;
853         }
854         return bpf_prog_start_time();
855 }
856
857 static void notrace update_prog_stats(struct bpf_prog *prog,
858                                       u64 start)
859 {
860         struct bpf_prog_stats *stats;
861
862         if (static_branch_unlikely(&bpf_stats_enabled_key) &&
863             /* static_key could be enabled in __bpf_prog_enter*
864              * and disabled in __bpf_prog_exit*.
865              * And vice versa.
866              * Hence check that 'start' is valid.
867              */
868             start > NO_START_TIME) {
869                 unsigned long flags;
870
871                 stats = this_cpu_ptr(prog->stats);
872                 flags = u64_stats_update_begin_irqsave(&stats->syncp);
873                 u64_stats_inc(&stats->cnt);
874                 u64_stats_add(&stats->nsecs, sched_clock() - start);
875                 u64_stats_update_end_irqrestore(&stats->syncp, flags);
876         }
877 }
878
879 static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
880                                           struct bpf_tramp_run_ctx *run_ctx)
881         __releases(RCU)
882 {
883         bpf_reset_run_ctx(run_ctx->saved_run_ctx);
884
885         update_prog_stats(prog, start);
886         this_cpu_dec(*(prog->active));
887         migrate_enable();
888         rcu_read_unlock();
889 }
890
891 static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
892                                                struct bpf_tramp_run_ctx *run_ctx)
893         __acquires(RCU)
894 {
895         /* Runtime stats are exported via actual BPF_LSM_CGROUP
896          * programs, not the shims.
897          */
898         rcu_read_lock();
899         migrate_disable();
900
901         run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
902
903         return NO_START_TIME;
904 }
905
906 static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
907                                                struct bpf_tramp_run_ctx *run_ctx)
908         __releases(RCU)
909 {
910         bpf_reset_run_ctx(run_ctx->saved_run_ctx);
911
912         migrate_enable();
913         rcu_read_unlock();
914 }
915
916 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
917                                              struct bpf_tramp_run_ctx *run_ctx)
918 {
919         rcu_read_lock_trace();
920         migrate_disable();
921         might_fault();
922
923         if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
924                 bpf_prog_inc_misses_counter(prog);
925                 return 0;
926         }
927
928         run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
929
930         return bpf_prog_start_time();
931 }
932
933 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
934                                              struct bpf_tramp_run_ctx *run_ctx)
935 {
936         bpf_reset_run_ctx(run_ctx->saved_run_ctx);
937
938         update_prog_stats(prog, start);
939         this_cpu_dec(*(prog->active));
940         migrate_enable();
941         rcu_read_unlock_trace();
942 }
943
944 static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
945                                               struct bpf_tramp_run_ctx *run_ctx)
946 {
947         rcu_read_lock_trace();
948         migrate_disable();
949         might_fault();
950
951         run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
952
953         return bpf_prog_start_time();
954 }
955
956 static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
957                                               struct bpf_tramp_run_ctx *run_ctx)
958 {
959         bpf_reset_run_ctx(run_ctx->saved_run_ctx);
960
961         update_prog_stats(prog, start);
962         migrate_enable();
963         rcu_read_unlock_trace();
964 }
965
966 static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
967                                     struct bpf_tramp_run_ctx *run_ctx)
968         __acquires(RCU)
969 {
970         rcu_read_lock();
971         migrate_disable();
972
973         run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
974
975         return bpf_prog_start_time();
976 }
977
978 static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
979                                     struct bpf_tramp_run_ctx *run_ctx)
980         __releases(RCU)
981 {
982         bpf_reset_run_ctx(run_ctx->saved_run_ctx);
983
984         update_prog_stats(prog, start);
985         migrate_enable();
986         rcu_read_unlock();
987 }
988
989 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
990 {
991         percpu_ref_get(&tr->pcref);
992 }
993
994 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
995 {
996         percpu_ref_put(&tr->pcref);
997 }
998
999 bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1000 {
1001         bool sleepable = prog->aux->sleepable;
1002
1003         if (bpf_prog_check_recur(prog))
1004                 return sleepable ? __bpf_prog_enter_sleepable_recur :
1005                         __bpf_prog_enter_recur;
1006
1007         if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1008             prog->expected_attach_type == BPF_LSM_CGROUP)
1009                 return __bpf_prog_enter_lsm_cgroup;
1010
1011         return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1012 }
1013
1014 bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1015 {
1016         bool sleepable = prog->aux->sleepable;
1017
1018         if (bpf_prog_check_recur(prog))
1019                 return sleepable ? __bpf_prog_exit_sleepable_recur :
1020                         __bpf_prog_exit_recur;
1021
1022         if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1023             prog->expected_attach_type == BPF_LSM_CGROUP)
1024                 return __bpf_prog_exit_lsm_cgroup;
1025
1026         return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1027 }
1028
1029 int __weak
1030 arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
1031                             const struct btf_func_model *m, u32 flags,
1032                             struct bpf_tramp_links *tlinks,
1033                             void *orig_call)
1034 {
1035         return -ENOTSUPP;
1036 }
1037
1038 static int __init init_trampolines(void)
1039 {
1040         int i;
1041
1042         for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1043                 INIT_HLIST_HEAD(&trampoline_table[i]);
1044         return 0;
1045 }
1046 late_initcall(init_trampolines);