bpf: Avoid taking spinlock in bpf_task_storage_get if potential deadlock is detected
[platform/kernel/linux-starfive.git] / kernel / bpf / bpf_task_storage.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2020 Facebook
4  * Copyright 2020 Google LLC.
5  */
6
7 #include <linux/pid.h>
8 #include <linux/sched.h>
9 #include <linux/rculist.h>
10 #include <linux/list.h>
11 #include <linux/hash.h>
12 #include <linux/types.h>
13 #include <linux/spinlock.h>
14 #include <linux/bpf.h>
15 #include <linux/bpf_local_storage.h>
16 #include <linux/filter.h>
17 #include <uapi/linux/btf.h>
18 #include <linux/btf_ids.h>
19 #include <linux/fdtable.h>
20 #include <linux/rcupdate_trace.h>
21
22 DEFINE_BPF_STORAGE_CACHE(task_cache);
23
24 static DEFINE_PER_CPU(int, bpf_task_storage_busy);
25
26 static void bpf_task_storage_lock(void)
27 {
28         migrate_disable();
29         this_cpu_inc(bpf_task_storage_busy);
30 }
31
32 static void bpf_task_storage_unlock(void)
33 {
34         this_cpu_dec(bpf_task_storage_busy);
35         migrate_enable();
36 }
37
38 static bool bpf_task_storage_trylock(void)
39 {
40         migrate_disable();
41         if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
42                 this_cpu_dec(bpf_task_storage_busy);
43                 migrate_enable();
44                 return false;
45         }
46         return true;
47 }
48
49 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
50 {
51         struct task_struct *task = owner;
52
53         return &task->bpf_storage;
54 }
55
56 static struct bpf_local_storage_data *
57 task_storage_lookup(struct task_struct *task, struct bpf_map *map,
58                     bool cacheit_lockit)
59 {
60         struct bpf_local_storage *task_storage;
61         struct bpf_local_storage_map *smap;
62
63         task_storage =
64                 rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
65         if (!task_storage)
66                 return NULL;
67
68         smap = (struct bpf_local_storage_map *)map;
69         return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
70 }
71
72 void bpf_task_storage_free(struct task_struct *task)
73 {
74         struct bpf_local_storage_elem *selem;
75         struct bpf_local_storage *local_storage;
76         bool free_task_storage = false;
77         struct hlist_node *n;
78         unsigned long flags;
79
80         rcu_read_lock();
81
82         local_storage = rcu_dereference(task->bpf_storage);
83         if (!local_storage) {
84                 rcu_read_unlock();
85                 return;
86         }
87
88         /* Neither the bpf_prog nor the bpf-map's syscall
89          * could be modifying the local_storage->list now.
90          * Thus, no elem can be added-to or deleted-from the
91          * local_storage->list by the bpf_prog or by the bpf-map's syscall.
92          *
93          * It is racing with bpf_local_storage_map_free() alone
94          * when unlinking elem from the local_storage->list and
95          * the map's bucket->list.
96          */
97         bpf_task_storage_lock();
98         raw_spin_lock_irqsave(&local_storage->lock, flags);
99         hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
100                 /* Always unlink from map before unlinking from
101                  * local_storage.
102                  */
103                 bpf_selem_unlink_map(selem);
104                 free_task_storage = bpf_selem_unlink_storage_nolock(
105                         local_storage, selem, false, false);
106         }
107         raw_spin_unlock_irqrestore(&local_storage->lock, flags);
108         bpf_task_storage_unlock();
109         rcu_read_unlock();
110
111         /* free_task_storage should always be true as long as
112          * local_storage->list was non-empty.
113          */
114         if (free_task_storage)
115                 kfree_rcu(local_storage, rcu);
116 }
117
118 static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
119 {
120         struct bpf_local_storage_data *sdata;
121         struct task_struct *task;
122         unsigned int f_flags;
123         struct pid *pid;
124         int fd, err;
125
126         fd = *(int *)key;
127         pid = pidfd_get_pid(fd, &f_flags);
128         if (IS_ERR(pid))
129                 return ERR_CAST(pid);
130
131         /* We should be in an RCU read side critical section, it should be safe
132          * to call pid_task.
133          */
134         WARN_ON_ONCE(!rcu_read_lock_held());
135         task = pid_task(pid, PIDTYPE_PID);
136         if (!task) {
137                 err = -ENOENT;
138                 goto out;
139         }
140
141         bpf_task_storage_lock();
142         sdata = task_storage_lookup(task, map, true);
143         bpf_task_storage_unlock();
144         put_pid(pid);
145         return sdata ? sdata->data : NULL;
146 out:
147         put_pid(pid);
148         return ERR_PTR(err);
149 }
150
151 static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
152                                             void *value, u64 map_flags)
153 {
154         struct bpf_local_storage_data *sdata;
155         struct task_struct *task;
156         unsigned int f_flags;
157         struct pid *pid;
158         int fd, err;
159
160         fd = *(int *)key;
161         pid = pidfd_get_pid(fd, &f_flags);
162         if (IS_ERR(pid))
163                 return PTR_ERR(pid);
164
165         /* We should be in an RCU read side critical section, it should be safe
166          * to call pid_task.
167          */
168         WARN_ON_ONCE(!rcu_read_lock_held());
169         task = pid_task(pid, PIDTYPE_PID);
170         if (!task) {
171                 err = -ENOENT;
172                 goto out;
173         }
174
175         bpf_task_storage_lock();
176         sdata = bpf_local_storage_update(
177                 task, (struct bpf_local_storage_map *)map, value, map_flags,
178                 GFP_ATOMIC);
179         bpf_task_storage_unlock();
180
181         err = PTR_ERR_OR_ZERO(sdata);
182 out:
183         put_pid(pid);
184         return err;
185 }
186
187 static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
188 {
189         struct bpf_local_storage_data *sdata;
190
191         sdata = task_storage_lookup(task, map, false);
192         if (!sdata)
193                 return -ENOENT;
194
195         bpf_selem_unlink(SELEM(sdata), true);
196
197         return 0;
198 }
199
200 static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
201 {
202         struct task_struct *task;
203         unsigned int f_flags;
204         struct pid *pid;
205         int fd, err;
206
207         fd = *(int *)key;
208         pid = pidfd_get_pid(fd, &f_flags);
209         if (IS_ERR(pid))
210                 return PTR_ERR(pid);
211
212         /* We should be in an RCU read side critical section, it should be safe
213          * to call pid_task.
214          */
215         WARN_ON_ONCE(!rcu_read_lock_held());
216         task = pid_task(pid, PIDTYPE_PID);
217         if (!task) {
218                 err = -ENOENT;
219                 goto out;
220         }
221
222         bpf_task_storage_lock();
223         err = task_storage_delete(task, map);
224         bpf_task_storage_unlock();
225 out:
226         put_pid(pid);
227         return err;
228 }
229
230 /* Called by bpf_task_storage_get*() helpers */
231 static void *__bpf_task_storage_get(struct bpf_map *map,
232                                     struct task_struct *task, void *value,
233                                     u64 flags, gfp_t gfp_flags, bool nobusy)
234 {
235         struct bpf_local_storage_data *sdata;
236
237         sdata = task_storage_lookup(task, map, nobusy);
238         if (sdata)
239                 return sdata->data;
240
241         /* only allocate new storage, when the task is refcounted */
242         if (refcount_read(&task->usage) &&
243             (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
244                 sdata = bpf_local_storage_update(
245                         task, (struct bpf_local_storage_map *)map, value,
246                         BPF_NOEXIST, gfp_flags);
247                 return IS_ERR(sdata) ? NULL : sdata->data;
248         }
249
250         return NULL;
251 }
252
253 /* *gfp_flags* is a hidden argument provided by the verifier */
254 BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
255            task, void *, value, u64, flags, gfp_t, gfp_flags)
256 {
257         bool nobusy;
258         void *data;
259
260         WARN_ON_ONCE(!bpf_rcu_lock_held());
261         if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
262                 return (unsigned long)NULL;
263
264         nobusy = bpf_task_storage_trylock();
265         data = __bpf_task_storage_get(map, task, value, flags,
266                                       gfp_flags, nobusy);
267         if (nobusy)
268                 bpf_task_storage_unlock();
269         return (unsigned long)data;
270 }
271
272 BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
273            task)
274 {
275         int ret;
276
277         WARN_ON_ONCE(!bpf_rcu_lock_held());
278         if (!task)
279                 return -EINVAL;
280
281         if (!bpf_task_storage_trylock())
282                 return -EBUSY;
283
284         /* This helper must only be called from places where the lifetime of the task
285          * is guaranteed. Either by being refcounted or by being protected
286          * by an RCU read-side critical section.
287          */
288         ret = task_storage_delete(task, map);
289         bpf_task_storage_unlock();
290         return ret;
291 }
292
293 static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
294 {
295         return -ENOTSUPP;
296 }
297
298 static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
299 {
300         struct bpf_local_storage_map *smap;
301
302         smap = bpf_local_storage_map_alloc(attr);
303         if (IS_ERR(smap))
304                 return ERR_CAST(smap);
305
306         smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
307         return &smap->map;
308 }
309
310 static void task_storage_map_free(struct bpf_map *map)
311 {
312         struct bpf_local_storage_map *smap;
313
314         smap = (struct bpf_local_storage_map *)map;
315         bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
316         bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
317 }
318
319 BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
320 const struct bpf_map_ops task_storage_map_ops = {
321         .map_meta_equal = bpf_map_meta_equal,
322         .map_alloc_check = bpf_local_storage_map_alloc_check,
323         .map_alloc = task_storage_map_alloc,
324         .map_free = task_storage_map_free,
325         .map_get_next_key = notsupp_get_next_key,
326         .map_lookup_elem = bpf_pid_task_storage_lookup_elem,
327         .map_update_elem = bpf_pid_task_storage_update_elem,
328         .map_delete_elem = bpf_pid_task_storage_delete_elem,
329         .map_check_btf = bpf_local_storage_map_check_btf,
330         .map_btf_id = &task_storage_map_btf_ids[0],
331         .map_owner_storage_ptr = task_storage_ptr,
332 };
333
334 const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
335         .func = bpf_task_storage_get_recur,
336         .gpl_only = false,
337         .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
338         .arg1_type = ARG_CONST_MAP_PTR,
339         .arg2_type = ARG_PTR_TO_BTF_ID,
340         .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
341         .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
342         .arg4_type = ARG_ANYTHING,
343 };
344
345 const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
346         .func = bpf_task_storage_delete_recur,
347         .gpl_only = false,
348         .ret_type = RET_INTEGER,
349         .arg1_type = ARG_CONST_MAP_PTR,
350         .arg2_type = ARG_PTR_TO_BTF_ID,
351         .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
352 };