Merge tag 'perf-tools-for-v6.3-1-2023-02-22' of git://git.kernel.org/pub/scm/linux...
[platform/kernel/linux-starfive.git] / kernel / bpf / bpf_local_storage.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook  */
3 #include <linux/rculist.h>
4 #include <linux/list.h>
5 #include <linux/hash.h>
6 #include <linux/types.h>
7 #include <linux/spinlock.h>
8 #include <linux/bpf.h>
9 #include <linux/btf_ids.h>
10 #include <linux/bpf_local_storage.h>
11 #include <net/sock.h>
12 #include <uapi/linux/sock_diag.h>
13 #include <uapi/linux/btf.h>
14 #include <linux/rcupdate.h>
15 #include <linux/rcupdate_trace.h>
16 #include <linux/rcupdate_wait.h>
17
18 #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
19
20 static struct bpf_local_storage_map_bucket *
21 select_bucket(struct bpf_local_storage_map *smap,
22               struct bpf_local_storage_elem *selem)
23 {
24         return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
25 }
26
27 static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
28 {
29         struct bpf_map *map = &smap->map;
30
31         if (!map->ops->map_local_storage_charge)
32                 return 0;
33
34         return map->ops->map_local_storage_charge(smap, owner, size);
35 }
36
37 static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
38                          u32 size)
39 {
40         struct bpf_map *map = &smap->map;
41
42         if (map->ops->map_local_storage_uncharge)
43                 map->ops->map_local_storage_uncharge(smap, owner, size);
44 }
45
46 static struct bpf_local_storage __rcu **
47 owner_storage(struct bpf_local_storage_map *smap, void *owner)
48 {
49         struct bpf_map *map = &smap->map;
50
51         return map->ops->map_owner_storage_ptr(owner);
52 }
53
54 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
55 {
56         return !hlist_unhashed(&selem->snode);
57 }
58
59 static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
60 {
61         return !hlist_unhashed(&selem->map_node);
62 }
63
64 struct bpf_local_storage_elem *
65 bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
66                 void *value, bool charge_mem, gfp_t gfp_flags)
67 {
68         struct bpf_local_storage_elem *selem;
69
70         if (charge_mem && mem_charge(smap, owner, smap->elem_size))
71                 return NULL;
72
73         selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
74                                 gfp_flags | __GFP_NOWARN);
75         if (selem) {
76                 if (value)
77                         copy_map_value(&smap->map, SDATA(selem)->data, value);
78                 return selem;
79         }
80
81         if (charge_mem)
82                 mem_uncharge(smap, owner, smap->elem_size);
83
84         return NULL;
85 }
86
87 void bpf_local_storage_free_rcu(struct rcu_head *rcu)
88 {
89         struct bpf_local_storage *local_storage;
90
91         /* If RCU Tasks Trace grace period implies RCU grace period, do
92          * kfree(), else do kfree_rcu().
93          */
94         local_storage = container_of(rcu, struct bpf_local_storage, rcu);
95         if (rcu_trace_implies_rcu_gp())
96                 kfree(local_storage);
97         else
98                 kfree_rcu(local_storage, rcu);
99 }
100
101 static void bpf_selem_free_rcu(struct rcu_head *rcu)
102 {
103         struct bpf_local_storage_elem *selem;
104
105         selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
106         if (rcu_trace_implies_rcu_gp())
107                 kfree(selem);
108         else
109                 kfree_rcu(selem, rcu);
110 }
111
112 /* local_storage->lock must be held and selem->local_storage == local_storage.
113  * The caller must ensure selem->smap is still valid to be
114  * dereferenced for its smap->elem_size and smap->cache_idx.
115  */
116 static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
117                                             struct bpf_local_storage_elem *selem,
118                                             bool uncharge_mem, bool use_trace_rcu)
119 {
120         struct bpf_local_storage_map *smap;
121         bool free_local_storage;
122         void *owner;
123
124         smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
125         owner = local_storage->owner;
126
127         /* All uncharging on the owner must be done first.
128          * The owner may be freed once the last selem is unlinked
129          * from local_storage.
130          */
131         if (uncharge_mem)
132                 mem_uncharge(smap, owner, smap->elem_size);
133
134         free_local_storage = hlist_is_singular_node(&selem->snode,
135                                                     &local_storage->list);
136         if (free_local_storage) {
137                 mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
138                 local_storage->owner = NULL;
139
140                 /* After this RCU_INIT, owner may be freed and cannot be used */
141                 RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
142
143                 /* local_storage is not freed now.  local_storage->lock is
144                  * still held and raw_spin_unlock_bh(&local_storage->lock)
145                  * will be done by the caller.
146                  *
147                  * Although the unlock will be done under
148                  * rcu_read_lock(),  it is more intuitive to
149                  * read if the freeing of the storage is done
150                  * after the raw_spin_unlock_bh(&local_storage->lock).
151                  *
152                  * Hence, a "bool free_local_storage" is returned
153                  * to the caller which then calls then frees the storage after
154                  * all the RCU grace periods have expired.
155                  */
156         }
157         hlist_del_init_rcu(&selem->snode);
158         if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
159             SDATA(selem))
160                 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
161
162         if (use_trace_rcu)
163                 call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
164         else
165                 kfree_rcu(selem, rcu);
166
167         return free_local_storage;
168 }
169
170 static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
171                                        bool use_trace_rcu)
172 {
173         struct bpf_local_storage *local_storage;
174         bool free_local_storage = false;
175         unsigned long flags;
176
177         if (unlikely(!selem_linked_to_storage(selem)))
178                 /* selem has already been unlinked from sk */
179                 return;
180
181         local_storage = rcu_dereference_check(selem->local_storage,
182                                               bpf_rcu_lock_held());
183         raw_spin_lock_irqsave(&local_storage->lock, flags);
184         if (likely(selem_linked_to_storage(selem)))
185                 free_local_storage = bpf_selem_unlink_storage_nolock(
186                         local_storage, selem, true, use_trace_rcu);
187         raw_spin_unlock_irqrestore(&local_storage->lock, flags);
188
189         if (free_local_storage) {
190                 if (use_trace_rcu)
191                         call_rcu_tasks_trace(&local_storage->rcu,
192                                      bpf_local_storage_free_rcu);
193                 else
194                         kfree_rcu(local_storage, rcu);
195         }
196 }
197
198 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
199                                    struct bpf_local_storage_elem *selem)
200 {
201         RCU_INIT_POINTER(selem->local_storage, local_storage);
202         hlist_add_head_rcu(&selem->snode, &local_storage->list);
203 }
204
205 void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
206 {
207         struct bpf_local_storage_map *smap;
208         struct bpf_local_storage_map_bucket *b;
209         unsigned long flags;
210
211         if (unlikely(!selem_linked_to_map(selem)))
212                 /* selem has already be unlinked from smap */
213                 return;
214
215         smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
216         b = select_bucket(smap, selem);
217         raw_spin_lock_irqsave(&b->lock, flags);
218         if (likely(selem_linked_to_map(selem)))
219                 hlist_del_init_rcu(&selem->map_node);
220         raw_spin_unlock_irqrestore(&b->lock, flags);
221 }
222
223 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
224                         struct bpf_local_storage_elem *selem)
225 {
226         struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
227         unsigned long flags;
228
229         raw_spin_lock_irqsave(&b->lock, flags);
230         RCU_INIT_POINTER(SDATA(selem)->smap, smap);
231         hlist_add_head_rcu(&selem->map_node, &b->list);
232         raw_spin_unlock_irqrestore(&b->lock, flags);
233 }
234
235 void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
236 {
237         /* Always unlink from map before unlinking from local_storage
238          * because selem will be freed after successfully unlinked from
239          * the local_storage.
240          */
241         bpf_selem_unlink_map(selem);
242         __bpf_selem_unlink_storage(selem, use_trace_rcu);
243 }
244
245 /* If cacheit_lockit is false, this lookup function is lockless */
246 struct bpf_local_storage_data *
247 bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
248                          struct bpf_local_storage_map *smap,
249                          bool cacheit_lockit)
250 {
251         struct bpf_local_storage_data *sdata;
252         struct bpf_local_storage_elem *selem;
253
254         /* Fast path (cache hit) */
255         sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
256                                       bpf_rcu_lock_held());
257         if (sdata && rcu_access_pointer(sdata->smap) == smap)
258                 return sdata;
259
260         /* Slow path (cache miss) */
261         hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
262                                   rcu_read_lock_trace_held())
263                 if (rcu_access_pointer(SDATA(selem)->smap) == smap)
264                         break;
265
266         if (!selem)
267                 return NULL;
268
269         sdata = SDATA(selem);
270         if (cacheit_lockit) {
271                 unsigned long flags;
272
273                 /* spinlock is needed to avoid racing with the
274                  * parallel delete.  Otherwise, publishing an already
275                  * deleted sdata to the cache will become a use-after-free
276                  * problem in the next bpf_local_storage_lookup().
277                  */
278                 raw_spin_lock_irqsave(&local_storage->lock, flags);
279                 if (selem_linked_to_storage(selem))
280                         rcu_assign_pointer(local_storage->cache[smap->cache_idx],
281                                            sdata);
282                 raw_spin_unlock_irqrestore(&local_storage->lock, flags);
283         }
284
285         return sdata;
286 }
287
288 static int check_flags(const struct bpf_local_storage_data *old_sdata,
289                        u64 map_flags)
290 {
291         if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
292                 /* elem already exists */
293                 return -EEXIST;
294
295         if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
296                 /* elem doesn't exist, cannot update it */
297                 return -ENOENT;
298
299         return 0;
300 }
301
302 int bpf_local_storage_alloc(void *owner,
303                             struct bpf_local_storage_map *smap,
304                             struct bpf_local_storage_elem *first_selem,
305                             gfp_t gfp_flags)
306 {
307         struct bpf_local_storage *prev_storage, *storage;
308         struct bpf_local_storage **owner_storage_ptr;
309         int err;
310
311         err = mem_charge(smap, owner, sizeof(*storage));
312         if (err)
313                 return err;
314
315         storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
316                                   gfp_flags | __GFP_NOWARN);
317         if (!storage) {
318                 err = -ENOMEM;
319                 goto uncharge;
320         }
321
322         INIT_HLIST_HEAD(&storage->list);
323         raw_spin_lock_init(&storage->lock);
324         storage->owner = owner;
325
326         bpf_selem_link_storage_nolock(storage, first_selem);
327         bpf_selem_link_map(smap, first_selem);
328
329         owner_storage_ptr =
330                 (struct bpf_local_storage **)owner_storage(smap, owner);
331         /* Publish storage to the owner.
332          * Instead of using any lock of the kernel object (i.e. owner),
333          * cmpxchg will work with any kernel object regardless what
334          * the running context is, bh, irq...etc.
335          *
336          * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
337          * is protected by the storage->lock.  Hence, when freeing
338          * the owner->storage, the storage->lock must be held before
339          * setting owner->storage ptr to NULL.
340          */
341         prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
342         if (unlikely(prev_storage)) {
343                 bpf_selem_unlink_map(first_selem);
344                 err = -EAGAIN;
345                 goto uncharge;
346
347                 /* Note that even first_selem was linked to smap's
348                  * bucket->list, first_selem can be freed immediately
349                  * (instead of kfree_rcu) because
350                  * bpf_local_storage_map_free() does a
351                  * synchronize_rcu_mult (waiting for both sleepable and
352                  * normal programs) before walking the bucket->list.
353                  * Hence, no one is accessing selem from the
354                  * bucket->list under rcu_read_lock().
355                  */
356         }
357
358         return 0;
359
360 uncharge:
361         kfree(storage);
362         mem_uncharge(smap, owner, sizeof(*storage));
363         return err;
364 }
365
366 /* sk cannot be going away because it is linking new elem
367  * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
368  * Otherwise, it will become a leak (and other memory issues
369  * during map destruction).
370  */
371 struct bpf_local_storage_data *
372 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
373                          void *value, u64 map_flags, gfp_t gfp_flags)
374 {
375         struct bpf_local_storage_data *old_sdata = NULL;
376         struct bpf_local_storage_elem *selem = NULL;
377         struct bpf_local_storage *local_storage;
378         unsigned long flags;
379         int err;
380
381         /* BPF_EXIST and BPF_NOEXIST cannot be both set */
382         if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
383             /* BPF_F_LOCK can only be used in a value with spin_lock */
384             unlikely((map_flags & BPF_F_LOCK) &&
385                      !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
386                 return ERR_PTR(-EINVAL);
387
388         if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
389                 return ERR_PTR(-EINVAL);
390
391         local_storage = rcu_dereference_check(*owner_storage(smap, owner),
392                                               bpf_rcu_lock_held());
393         if (!local_storage || hlist_empty(&local_storage->list)) {
394                 /* Very first elem for the owner */
395                 err = check_flags(NULL, map_flags);
396                 if (err)
397                         return ERR_PTR(err);
398
399                 selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
400                 if (!selem)
401                         return ERR_PTR(-ENOMEM);
402
403                 err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
404                 if (err) {
405                         kfree(selem);
406                         mem_uncharge(smap, owner, smap->elem_size);
407                         return ERR_PTR(err);
408                 }
409
410                 return SDATA(selem);
411         }
412
413         if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
414                 /* Hoping to find an old_sdata to do inline update
415                  * such that it can avoid taking the local_storage->lock
416                  * and changing the lists.
417                  */
418                 old_sdata =
419                         bpf_local_storage_lookup(local_storage, smap, false);
420                 err = check_flags(old_sdata, map_flags);
421                 if (err)
422                         return ERR_PTR(err);
423                 if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
424                         copy_map_value_locked(&smap->map, old_sdata->data,
425                                               value, false);
426                         return old_sdata;
427                 }
428         }
429
430         if (gfp_flags == GFP_KERNEL) {
431                 selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
432                 if (!selem)
433                         return ERR_PTR(-ENOMEM);
434         }
435
436         raw_spin_lock_irqsave(&local_storage->lock, flags);
437
438         /* Recheck local_storage->list under local_storage->lock */
439         if (unlikely(hlist_empty(&local_storage->list))) {
440                 /* A parallel del is happening and local_storage is going
441                  * away.  It has just been checked before, so very
442                  * unlikely.  Return instead of retry to keep things
443                  * simple.
444                  */
445                 err = -EAGAIN;
446                 goto unlock_err;
447         }
448
449         old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
450         err = check_flags(old_sdata, map_flags);
451         if (err)
452                 goto unlock_err;
453
454         if (old_sdata && (map_flags & BPF_F_LOCK)) {
455                 copy_map_value_locked(&smap->map, old_sdata->data, value,
456                                       false);
457                 selem = SELEM(old_sdata);
458                 goto unlock;
459         }
460
461         if (gfp_flags != GFP_KERNEL) {
462                 /* local_storage->lock is held.  Hence, we are sure
463                  * we can unlink and uncharge the old_sdata successfully
464                  * later.  Hence, instead of charging the new selem now
465                  * and then uncharge the old selem later (which may cause
466                  * a potential but unnecessary charge failure),  avoid taking
467                  * a charge at all here (the "!old_sdata" check) and the
468                  * old_sdata will not be uncharged later during
469                  * bpf_selem_unlink_storage_nolock().
470                  */
471                 selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
472                 if (!selem) {
473                         err = -ENOMEM;
474                         goto unlock_err;
475                 }
476         }
477
478         /* First, link the new selem to the map */
479         bpf_selem_link_map(smap, selem);
480
481         /* Second, link (and publish) the new selem to local_storage */
482         bpf_selem_link_storage_nolock(local_storage, selem);
483
484         /* Third, remove old selem, SELEM(old_sdata) */
485         if (old_sdata) {
486                 bpf_selem_unlink_map(SELEM(old_sdata));
487                 bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
488                                                 false, true);
489         }
490
491 unlock:
492         raw_spin_unlock_irqrestore(&local_storage->lock, flags);
493         return SDATA(selem);
494
495 unlock_err:
496         raw_spin_unlock_irqrestore(&local_storage->lock, flags);
497         if (selem) {
498                 mem_uncharge(smap, owner, smap->elem_size);
499                 kfree(selem);
500         }
501         return ERR_PTR(err);
502 }
503
504 static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
505 {
506         u64 min_usage = U64_MAX;
507         u16 i, res = 0;
508
509         spin_lock(&cache->idx_lock);
510
511         for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
512                 if (cache->idx_usage_counts[i] < min_usage) {
513                         min_usage = cache->idx_usage_counts[i];
514                         res = i;
515
516                         /* Found a free cache_idx */
517                         if (!min_usage)
518                                 break;
519                 }
520         }
521         cache->idx_usage_counts[res]++;
522
523         spin_unlock(&cache->idx_lock);
524
525         return res;
526 }
527
528 static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
529                                              u16 idx)
530 {
531         spin_lock(&cache->idx_lock);
532         cache->idx_usage_counts[idx]--;
533         spin_unlock(&cache->idx_lock);
534 }
535
536 int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
537 {
538         if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
539             !(attr->map_flags & BPF_F_NO_PREALLOC) ||
540             attr->max_entries ||
541             attr->key_size != sizeof(int) || !attr->value_size ||
542             /* Enforce BTF for userspace sk dumping */
543             !attr->btf_key_type_id || !attr->btf_value_type_id)
544                 return -EINVAL;
545
546         if (!bpf_capable())
547                 return -EPERM;
548
549         if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
550                 return -E2BIG;
551
552         return 0;
553 }
554
555 static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)
556 {
557         struct bpf_local_storage_map *smap;
558         unsigned int i;
559         u32 nbuckets;
560
561         smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
562         if (!smap)
563                 return ERR_PTR(-ENOMEM);
564         bpf_map_init_from_attr(&smap->map, attr);
565
566         nbuckets = roundup_pow_of_two(num_possible_cpus());
567         /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
568         nbuckets = max_t(u32, 2, nbuckets);
569         smap->bucket_log = ilog2(nbuckets);
570
571         smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
572                                          nbuckets, GFP_USER | __GFP_NOWARN);
573         if (!smap->buckets) {
574                 bpf_map_area_free(smap);
575                 return ERR_PTR(-ENOMEM);
576         }
577
578         for (i = 0; i < nbuckets; i++) {
579                 INIT_HLIST_HEAD(&smap->buckets[i].list);
580                 raw_spin_lock_init(&smap->buckets[i].lock);
581         }
582
583         smap->elem_size = offsetof(struct bpf_local_storage_elem,
584                                    sdata.data[attr->value_size]);
585
586         return smap;
587 }
588
589 int bpf_local_storage_map_check_btf(const struct bpf_map *map,
590                                     const struct btf *btf,
591                                     const struct btf_type *key_type,
592                                     const struct btf_type *value_type)
593 {
594         u32 int_data;
595
596         if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
597                 return -EINVAL;
598
599         int_data = *(u32 *)(key_type + 1);
600         if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
601                 return -EINVAL;
602
603         return 0;
604 }
605
606 bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
607 {
608         struct bpf_local_storage_elem *selem;
609         bool free_storage = false;
610         struct hlist_node *n;
611
612         /* Neither the bpf_prog nor the bpf_map's syscall
613          * could be modifying the local_storage->list now.
614          * Thus, no elem can be added to or deleted from the
615          * local_storage->list by the bpf_prog or by the bpf_map's syscall.
616          *
617          * It is racing with bpf_local_storage_map_free() alone
618          * when unlinking elem from the local_storage->list and
619          * the map's bucket->list.
620          */
621         hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
622                 /* Always unlink from map before unlinking from
623                  * local_storage.
624                  */
625                 bpf_selem_unlink_map(selem);
626                 /* If local_storage list has only one element, the
627                  * bpf_selem_unlink_storage_nolock() will return true.
628                  * Otherwise, it will return false. The current loop iteration
629                  * intends to remove all local storage. So the last iteration
630                  * of the loop will set the free_cgroup_storage to true.
631                  */
632                 free_storage = bpf_selem_unlink_storage_nolock(
633                         local_storage, selem, false, false);
634         }
635
636         return free_storage;
637 }
638
639 struct bpf_map *
640 bpf_local_storage_map_alloc(union bpf_attr *attr,
641                             struct bpf_local_storage_cache *cache)
642 {
643         struct bpf_local_storage_map *smap;
644
645         smap = __bpf_local_storage_map_alloc(attr);
646         if (IS_ERR(smap))
647                 return ERR_CAST(smap);
648
649         smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
650         return &smap->map;
651 }
652
653 void bpf_local_storage_map_free(struct bpf_map *map,
654                                 struct bpf_local_storage_cache *cache,
655                                 int __percpu *busy_counter)
656 {
657         struct bpf_local_storage_map_bucket *b;
658         struct bpf_local_storage_elem *selem;
659         struct bpf_local_storage_map *smap;
660         unsigned int i;
661
662         smap = (struct bpf_local_storage_map *)map;
663         bpf_local_storage_cache_idx_free(cache, smap->cache_idx);
664
665         /* Note that this map might be concurrently cloned from
666          * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
667          * RCU read section to finish before proceeding. New RCU
668          * read sections should be prevented via bpf_map_inc_not_zero.
669          */
670         synchronize_rcu();
671
672         /* bpf prog and the userspace can no longer access this map
673          * now.  No new selem (of this map) can be added
674          * to the owner->storage or to the map bucket's list.
675          *
676          * The elem of this map can be cleaned up here
677          * or when the storage is freed e.g.
678          * by bpf_sk_storage_free() during __sk_destruct().
679          */
680         for (i = 0; i < (1U << smap->bucket_log); i++) {
681                 b = &smap->buckets[i];
682
683                 rcu_read_lock();
684                 /* No one is adding to b->list now */
685                 while ((selem = hlist_entry_safe(
686                                 rcu_dereference_raw(hlist_first_rcu(&b->list)),
687                                 struct bpf_local_storage_elem, map_node))) {
688                         if (busy_counter) {
689                                 migrate_disable();
690                                 this_cpu_inc(*busy_counter);
691                         }
692                         bpf_selem_unlink(selem, false);
693                         if (busy_counter) {
694                                 this_cpu_dec(*busy_counter);
695                                 migrate_enable();
696                         }
697                         cond_resched_rcu();
698                 }
699                 rcu_read_unlock();
700         }
701
702         /* While freeing the storage we may still need to access the map.
703          *
704          * e.g. when bpf_sk_storage_free() has unlinked selem from the map
705          * which then made the above while((selem = ...)) loop
706          * exit immediately.
707          *
708          * However, while freeing the storage one still needs to access the
709          * smap->elem_size to do the uncharging in
710          * bpf_selem_unlink_storage_nolock().
711          *
712          * Hence, wait another rcu grace period for the storage to be freed.
713          */
714         synchronize_rcu();
715
716         kvfree(smap->buckets);
717         bpf_map_area_free(smap);
718 }