Merge branch 'for-joerg/batched-unmap' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-rpi.git] / kernel / audit_tree.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "audit.h"
3 #include <linux/fsnotify_backend.h>
4 #include <linux/namei.h>
5 #include <linux/mount.h>
6 #include <linux/kthread.h>
7 #include <linux/refcount.h>
8 #include <linux/slab.h>
9
10 struct audit_tree;
11 struct audit_chunk;
12
13 struct audit_tree {
14         refcount_t count;
15         int goner;
16         struct audit_chunk *root;
17         struct list_head chunks;
18         struct list_head rules;
19         struct list_head list;
20         struct list_head same_root;
21         struct rcu_head head;
22         char pathname[];
23 };
24
25 struct audit_chunk {
26         struct list_head hash;
27         unsigned long key;
28         struct fsnotify_mark *mark;
29         struct list_head trees;         /* with root here */
30         int count;
31         atomic_long_t refs;
32         struct rcu_head head;
33         struct node {
34                 struct list_head list;
35                 struct audit_tree *owner;
36                 unsigned index;         /* index; upper bit indicates 'will prune' */
37         } owners[];
38 };
39
40 struct audit_tree_mark {
41         struct fsnotify_mark mark;
42         struct audit_chunk *chunk;
43 };
44
45 static LIST_HEAD(tree_list);
46 static LIST_HEAD(prune_list);
47 static struct task_struct *prune_thread;
48
49 /*
50  * One struct chunk is attached to each inode of interest through
51  * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
52  * untagging, the mark is stable as long as there is chunk attached. The
53  * association between mark and chunk is protected by hash_lock and
54  * audit_tree_group->mark_mutex. Thus as long as we hold
55  * audit_tree_group->mark_mutex and check that the mark is alive by
56  * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
57  * the current chunk.
58  *
59  * Rules have pointer to struct audit_tree.
60  * Rules have struct list_head rlist forming a list of rules over
61  * the same tree.
62  * References to struct chunk are collected at audit_inode{,_child}()
63  * time and used in AUDIT_TREE rule matching.
64  * These references are dropped at the same time we are calling
65  * audit_free_names(), etc.
66  *
67  * Cyclic lists galore:
68  * tree.chunks anchors chunk.owners[].list                      hash_lock
69  * tree.rules anchors rule.rlist                                audit_filter_mutex
70  * chunk.trees anchors tree.same_root                           hash_lock
71  * chunk.hash is a hash with middle bits of watch.inode as
72  * a hash function.                                             RCU, hash_lock
73  *
74  * tree is refcounted; one reference for "some rules on rules_list refer to
75  * it", one for each chunk with pointer to it.
76  *
77  * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
78  * one chunk reference. This reference is dropped either when a mark is going
79  * to be freed (corresponding inode goes away) or when chunk attached to the
80  * mark gets replaced. This reference must be dropped using
81  * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
82  * grace period as it protects RCU readers of the hash table.
83  *
84  * node.index allows to get from node.list to containing chunk.
85  * MSB of that sucker is stolen to mark taggings that we might have to
86  * revert - several operations have very unpleasant cleanup logics and
87  * that makes a difference.  Some.
88  */
89
90 static struct fsnotify_group *audit_tree_group;
91 static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
92
93 static struct audit_tree *alloc_tree(const char *s)
94 {
95         struct audit_tree *tree;
96
97         tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
98         if (tree) {
99                 refcount_set(&tree->count, 1);
100                 tree->goner = 0;
101                 INIT_LIST_HEAD(&tree->chunks);
102                 INIT_LIST_HEAD(&tree->rules);
103                 INIT_LIST_HEAD(&tree->list);
104                 INIT_LIST_HEAD(&tree->same_root);
105                 tree->root = NULL;
106                 strcpy(tree->pathname, s);
107         }
108         return tree;
109 }
110
111 static inline void get_tree(struct audit_tree *tree)
112 {
113         refcount_inc(&tree->count);
114 }
115
116 static inline void put_tree(struct audit_tree *tree)
117 {
118         if (refcount_dec_and_test(&tree->count))
119                 kfree_rcu(tree, head);
120 }
121
122 /* to avoid bringing the entire thing in audit.h */
123 const char *audit_tree_path(struct audit_tree *tree)
124 {
125         return tree->pathname;
126 }
127
128 static void free_chunk(struct audit_chunk *chunk)
129 {
130         int i;
131
132         for (i = 0; i < chunk->count; i++) {
133                 if (chunk->owners[i].owner)
134                         put_tree(chunk->owners[i].owner);
135         }
136         kfree(chunk);
137 }
138
139 void audit_put_chunk(struct audit_chunk *chunk)
140 {
141         if (atomic_long_dec_and_test(&chunk->refs))
142                 free_chunk(chunk);
143 }
144
145 static void __put_chunk(struct rcu_head *rcu)
146 {
147         struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
148         audit_put_chunk(chunk);
149 }
150
151 /*
152  * Drop reference to the chunk that was held by the mark. This is the reference
153  * that gets dropped after we've removed the chunk from the hash table and we
154  * use it to make sure chunk cannot be freed before RCU grace period expires.
155  */
156 static void audit_mark_put_chunk(struct audit_chunk *chunk)
157 {
158         call_rcu(&chunk->head, __put_chunk);
159 }
160
161 static inline struct audit_tree_mark *audit_mark(struct fsnotify_mark *mark)
162 {
163         return container_of(mark, struct audit_tree_mark, mark);
164 }
165
166 static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
167 {
168         return audit_mark(mark)->chunk;
169 }
170
171 static void audit_tree_destroy_watch(struct fsnotify_mark *mark)
172 {
173         kmem_cache_free(audit_tree_mark_cachep, audit_mark(mark));
174 }
175
176 static struct fsnotify_mark *alloc_mark(void)
177 {
178         struct audit_tree_mark *amark;
179
180         amark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
181         if (!amark)
182                 return NULL;
183         fsnotify_init_mark(&amark->mark, audit_tree_group);
184         amark->mark.mask = FS_IN_IGNORED;
185         return &amark->mark;
186 }
187
188 static struct audit_chunk *alloc_chunk(int count)
189 {
190         struct audit_chunk *chunk;
191         size_t size;
192         int i;
193
194         size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
195         chunk = kzalloc(size, GFP_KERNEL);
196         if (!chunk)
197                 return NULL;
198
199         INIT_LIST_HEAD(&chunk->hash);
200         INIT_LIST_HEAD(&chunk->trees);
201         chunk->count = count;
202         atomic_long_set(&chunk->refs, 1);
203         for (i = 0; i < count; i++) {
204                 INIT_LIST_HEAD(&chunk->owners[i].list);
205                 chunk->owners[i].index = i;
206         }
207         return chunk;
208 }
209
210 enum {HASH_SIZE = 128};
211 static struct list_head chunk_hash_heads[HASH_SIZE];
212 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
213
214 /* Function to return search key in our hash from inode. */
215 static unsigned long inode_to_key(const struct inode *inode)
216 {
217         /* Use address pointed to by connector->obj as the key */
218         return (unsigned long)&inode->i_fsnotify_marks;
219 }
220
221 static inline struct list_head *chunk_hash(unsigned long key)
222 {
223         unsigned long n = key / L1_CACHE_BYTES;
224         return chunk_hash_heads + n % HASH_SIZE;
225 }
226
227 /* hash_lock & mark->group->mark_mutex is held by caller */
228 static void insert_hash(struct audit_chunk *chunk)
229 {
230         struct list_head *list;
231
232         /*
233          * Make sure chunk is fully initialized before making it visible in the
234          * hash. Pairs with a data dependency barrier in READ_ONCE() in
235          * audit_tree_lookup().
236          */
237         smp_wmb();
238         WARN_ON_ONCE(!chunk->key);
239         list = chunk_hash(chunk->key);
240         list_add_rcu(&chunk->hash, list);
241 }
242
243 /* called under rcu_read_lock */
244 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
245 {
246         unsigned long key = inode_to_key(inode);
247         struct list_head *list = chunk_hash(key);
248         struct audit_chunk *p;
249
250         list_for_each_entry_rcu(p, list, hash) {
251                 /*
252                  * We use a data dependency barrier in READ_ONCE() to make sure
253                  * the chunk we see is fully initialized.
254                  */
255                 if (READ_ONCE(p->key) == key) {
256                         atomic_long_inc(&p->refs);
257                         return p;
258                 }
259         }
260         return NULL;
261 }
262
263 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
264 {
265         int n;
266         for (n = 0; n < chunk->count; n++)
267                 if (chunk->owners[n].owner == tree)
268                         return true;
269         return false;
270 }
271
272 /* tagging and untagging inodes with trees */
273
274 static struct audit_chunk *find_chunk(struct node *p)
275 {
276         int index = p->index & ~(1U<<31);
277         p -= index;
278         return container_of(p, struct audit_chunk, owners[0]);
279 }
280
281 static void replace_mark_chunk(struct fsnotify_mark *mark,
282                                struct audit_chunk *chunk)
283 {
284         struct audit_chunk *old;
285
286         assert_spin_locked(&hash_lock);
287         old = mark_chunk(mark);
288         audit_mark(mark)->chunk = chunk;
289         if (chunk)
290                 chunk->mark = mark;
291         if (old)
292                 old->mark = NULL;
293 }
294
295 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
296 {
297         struct audit_tree *owner;
298         int i, j;
299
300         new->key = old->key;
301         list_splice_init(&old->trees, &new->trees);
302         list_for_each_entry(owner, &new->trees, same_root)
303                 owner->root = new;
304         for (i = j = 0; j < old->count; i++, j++) {
305                 if (!old->owners[j].owner) {
306                         i--;
307                         continue;
308                 }
309                 owner = old->owners[j].owner;
310                 new->owners[i].owner = owner;
311                 new->owners[i].index = old->owners[j].index - j + i;
312                 if (!owner) /* result of earlier fallback */
313                         continue;
314                 get_tree(owner);
315                 list_replace_init(&old->owners[j].list, &new->owners[i].list);
316         }
317         replace_mark_chunk(old->mark, new);
318         /*
319          * Make sure chunk is fully initialized before making it visible in the
320          * hash. Pairs with a data dependency barrier in READ_ONCE() in
321          * audit_tree_lookup().
322          */
323         smp_wmb();
324         list_replace_rcu(&old->hash, &new->hash);
325 }
326
327 static void remove_chunk_node(struct audit_chunk *chunk, struct node *p)
328 {
329         struct audit_tree *owner = p->owner;
330
331         if (owner->root == chunk) {
332                 list_del_init(&owner->same_root);
333                 owner->root = NULL;
334         }
335         list_del_init(&p->list);
336         p->owner = NULL;
337         put_tree(owner);
338 }
339
340 static int chunk_count_trees(struct audit_chunk *chunk)
341 {
342         int i;
343         int ret = 0;
344
345         for (i = 0; i < chunk->count; i++)
346                 if (chunk->owners[i].owner)
347                         ret++;
348         return ret;
349 }
350
351 static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark)
352 {
353         struct audit_chunk *new;
354         int size;
355
356         mutex_lock(&audit_tree_group->mark_mutex);
357         /*
358          * mark_mutex stabilizes chunk attached to the mark so we can check
359          * whether it didn't change while we've dropped hash_lock.
360          */
361         if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
362             mark_chunk(mark) != chunk)
363                 goto out_mutex;
364
365         size = chunk_count_trees(chunk);
366         if (!size) {
367                 spin_lock(&hash_lock);
368                 list_del_init(&chunk->trees);
369                 list_del_rcu(&chunk->hash);
370                 replace_mark_chunk(mark, NULL);
371                 spin_unlock(&hash_lock);
372                 fsnotify_detach_mark(mark);
373                 mutex_unlock(&audit_tree_group->mark_mutex);
374                 audit_mark_put_chunk(chunk);
375                 fsnotify_free_mark(mark);
376                 return;
377         }
378
379         new = alloc_chunk(size);
380         if (!new)
381                 goto out_mutex;
382
383         spin_lock(&hash_lock);
384         /*
385          * This has to go last when updating chunk as once replace_chunk() is
386          * called, new RCU readers can see the new chunk.
387          */
388         replace_chunk(new, chunk);
389         spin_unlock(&hash_lock);
390         mutex_unlock(&audit_tree_group->mark_mutex);
391         audit_mark_put_chunk(chunk);
392         return;
393
394 out_mutex:
395         mutex_unlock(&audit_tree_group->mark_mutex);
396 }
397
398 /* Call with group->mark_mutex held, releases it */
399 static int create_chunk(struct inode *inode, struct audit_tree *tree)
400 {
401         struct fsnotify_mark *mark;
402         struct audit_chunk *chunk = alloc_chunk(1);
403
404         if (!chunk) {
405                 mutex_unlock(&audit_tree_group->mark_mutex);
406                 return -ENOMEM;
407         }
408
409         mark = alloc_mark();
410         if (!mark) {
411                 mutex_unlock(&audit_tree_group->mark_mutex);
412                 kfree(chunk);
413                 return -ENOMEM;
414         }
415
416         if (fsnotify_add_inode_mark_locked(mark, inode, 0)) {
417                 mutex_unlock(&audit_tree_group->mark_mutex);
418                 fsnotify_put_mark(mark);
419                 kfree(chunk);
420                 return -ENOSPC;
421         }
422
423         spin_lock(&hash_lock);
424         if (tree->goner) {
425                 spin_unlock(&hash_lock);
426                 fsnotify_detach_mark(mark);
427                 mutex_unlock(&audit_tree_group->mark_mutex);
428                 fsnotify_free_mark(mark);
429                 fsnotify_put_mark(mark);
430                 kfree(chunk);
431                 return 0;
432         }
433         replace_mark_chunk(mark, chunk);
434         chunk->owners[0].index = (1U << 31);
435         chunk->owners[0].owner = tree;
436         get_tree(tree);
437         list_add(&chunk->owners[0].list, &tree->chunks);
438         if (!tree->root) {
439                 tree->root = chunk;
440                 list_add(&tree->same_root, &chunk->trees);
441         }
442         chunk->key = inode_to_key(inode);
443         /*
444          * Inserting into the hash table has to go last as once we do that RCU
445          * readers can see the chunk.
446          */
447         insert_hash(chunk);
448         spin_unlock(&hash_lock);
449         mutex_unlock(&audit_tree_group->mark_mutex);
450         /*
451          * Drop our initial reference. When mark we point to is getting freed,
452          * we get notification through ->freeing_mark callback and cleanup
453          * chunk pointing to this mark.
454          */
455         fsnotify_put_mark(mark);
456         return 0;
457 }
458
459 /* the first tagged inode becomes root of tree */
460 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
461 {
462         struct fsnotify_mark *mark;
463         struct audit_chunk *chunk, *old;
464         struct node *p;
465         int n;
466
467         mutex_lock(&audit_tree_group->mark_mutex);
468         mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
469         if (!mark)
470                 return create_chunk(inode, tree);
471
472         /*
473          * Found mark is guaranteed to be attached and mark_mutex protects mark
474          * from getting detached and thus it makes sure there is chunk attached
475          * to the mark.
476          */
477         /* are we already there? */
478         spin_lock(&hash_lock);
479         old = mark_chunk(mark);
480         for (n = 0; n < old->count; n++) {
481                 if (old->owners[n].owner == tree) {
482                         spin_unlock(&hash_lock);
483                         mutex_unlock(&audit_tree_group->mark_mutex);
484                         fsnotify_put_mark(mark);
485                         return 0;
486                 }
487         }
488         spin_unlock(&hash_lock);
489
490         chunk = alloc_chunk(old->count + 1);
491         if (!chunk) {
492                 mutex_unlock(&audit_tree_group->mark_mutex);
493                 fsnotify_put_mark(mark);
494                 return -ENOMEM;
495         }
496
497         spin_lock(&hash_lock);
498         if (tree->goner) {
499                 spin_unlock(&hash_lock);
500                 mutex_unlock(&audit_tree_group->mark_mutex);
501                 fsnotify_put_mark(mark);
502                 kfree(chunk);
503                 return 0;
504         }
505         p = &chunk->owners[chunk->count - 1];
506         p->index = (chunk->count - 1) | (1U<<31);
507         p->owner = tree;
508         get_tree(tree);
509         list_add(&p->list, &tree->chunks);
510         if (!tree->root) {
511                 tree->root = chunk;
512                 list_add(&tree->same_root, &chunk->trees);
513         }
514         /*
515          * This has to go last when updating chunk as once replace_chunk() is
516          * called, new RCU readers can see the new chunk.
517          */
518         replace_chunk(chunk, old);
519         spin_unlock(&hash_lock);
520         mutex_unlock(&audit_tree_group->mark_mutex);
521         fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */
522         audit_mark_put_chunk(old);
523
524         return 0;
525 }
526
527 static void audit_tree_log_remove_rule(struct audit_context *context,
528                                        struct audit_krule *rule)
529 {
530         struct audit_buffer *ab;
531
532         if (!audit_enabled)
533                 return;
534         ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
535         if (unlikely(!ab))
536                 return;
537         audit_log_format(ab, "op=remove_rule dir=");
538         audit_log_untrustedstring(ab, rule->tree->pathname);
539         audit_log_key(ab, rule->filterkey);
540         audit_log_format(ab, " list=%d res=1", rule->listnr);
541         audit_log_end(ab);
542 }
543
544 static void kill_rules(struct audit_context *context, struct audit_tree *tree)
545 {
546         struct audit_krule *rule, *next;
547         struct audit_entry *entry;
548
549         list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
550                 entry = container_of(rule, struct audit_entry, rule);
551
552                 list_del_init(&rule->rlist);
553                 if (rule->tree) {
554                         /* not a half-baked one */
555                         audit_tree_log_remove_rule(context, rule);
556                         if (entry->rule.exe)
557                                 audit_remove_mark(entry->rule.exe);
558                         rule->tree = NULL;
559                         list_del_rcu(&entry->list);
560                         list_del(&entry->rule.list);
561                         call_rcu(&entry->rcu, audit_free_rule_rcu);
562                 }
563         }
564 }
565
566 /*
567  * Remove tree from chunks. If 'tagged' is set, remove tree only from tagged
568  * chunks. The function expects tagged chunks are all at the beginning of the
569  * chunks list.
570  */
571 static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
572 {
573         spin_lock(&hash_lock);
574         while (!list_empty(&victim->chunks)) {
575                 struct node *p;
576                 struct audit_chunk *chunk;
577                 struct fsnotify_mark *mark;
578
579                 p = list_first_entry(&victim->chunks, struct node, list);
580                 /* have we run out of marked? */
581                 if (tagged && !(p->index & (1U<<31)))
582                         break;
583                 chunk = find_chunk(p);
584                 mark = chunk->mark;
585                 remove_chunk_node(chunk, p);
586                 /* Racing with audit_tree_freeing_mark()? */
587                 if (!mark)
588                         continue;
589                 fsnotify_get_mark(mark);
590                 spin_unlock(&hash_lock);
591
592                 untag_chunk(chunk, mark);
593                 fsnotify_put_mark(mark);
594
595                 spin_lock(&hash_lock);
596         }
597         spin_unlock(&hash_lock);
598         put_tree(victim);
599 }
600
601 /*
602  * finish killing struct audit_tree
603  */
604 static void prune_one(struct audit_tree *victim)
605 {
606         prune_tree_chunks(victim, false);
607 }
608
609 /* trim the uncommitted chunks from tree */
610
611 static void trim_marked(struct audit_tree *tree)
612 {
613         struct list_head *p, *q;
614         spin_lock(&hash_lock);
615         if (tree->goner) {
616                 spin_unlock(&hash_lock);
617                 return;
618         }
619         /* reorder */
620         for (p = tree->chunks.next; p != &tree->chunks; p = q) {
621                 struct node *node = list_entry(p, struct node, list);
622                 q = p->next;
623                 if (node->index & (1U<<31)) {
624                         list_del_init(p);
625                         list_add(p, &tree->chunks);
626                 }
627         }
628         spin_unlock(&hash_lock);
629
630         prune_tree_chunks(tree, true);
631
632         spin_lock(&hash_lock);
633         if (!tree->root && !tree->goner) {
634                 tree->goner = 1;
635                 spin_unlock(&hash_lock);
636                 mutex_lock(&audit_filter_mutex);
637                 kill_rules(audit_context(), tree);
638                 list_del_init(&tree->list);
639                 mutex_unlock(&audit_filter_mutex);
640                 prune_one(tree);
641         } else {
642                 spin_unlock(&hash_lock);
643         }
644 }
645
646 static void audit_schedule_prune(void);
647
648 /* called with audit_filter_mutex */
649 int audit_remove_tree_rule(struct audit_krule *rule)
650 {
651         struct audit_tree *tree;
652         tree = rule->tree;
653         if (tree) {
654                 spin_lock(&hash_lock);
655                 list_del_init(&rule->rlist);
656                 if (list_empty(&tree->rules) && !tree->goner) {
657                         tree->root = NULL;
658                         list_del_init(&tree->same_root);
659                         tree->goner = 1;
660                         list_move(&tree->list, &prune_list);
661                         rule->tree = NULL;
662                         spin_unlock(&hash_lock);
663                         audit_schedule_prune();
664                         return 1;
665                 }
666                 rule->tree = NULL;
667                 spin_unlock(&hash_lock);
668                 return 1;
669         }
670         return 0;
671 }
672
673 static int compare_root(struct vfsmount *mnt, void *arg)
674 {
675         return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
676                (unsigned long)arg;
677 }
678
679 void audit_trim_trees(void)
680 {
681         struct list_head cursor;
682
683         mutex_lock(&audit_filter_mutex);
684         list_add(&cursor, &tree_list);
685         while (cursor.next != &tree_list) {
686                 struct audit_tree *tree;
687                 struct path path;
688                 struct vfsmount *root_mnt;
689                 struct node *node;
690                 int err;
691
692                 tree = container_of(cursor.next, struct audit_tree, list);
693                 get_tree(tree);
694                 list_del(&cursor);
695                 list_add(&cursor, &tree->list);
696                 mutex_unlock(&audit_filter_mutex);
697
698                 err = kern_path(tree->pathname, 0, &path);
699                 if (err)
700                         goto skip_it;
701
702                 root_mnt = collect_mounts(&path);
703                 path_put(&path);
704                 if (IS_ERR(root_mnt))
705                         goto skip_it;
706
707                 spin_lock(&hash_lock);
708                 list_for_each_entry(node, &tree->chunks, list) {
709                         struct audit_chunk *chunk = find_chunk(node);
710                         /* this could be NULL if the watch is dying else where... */
711                         node->index |= 1U<<31;
712                         if (iterate_mounts(compare_root,
713                                            (void *)(chunk->key),
714                                            root_mnt))
715                                 node->index &= ~(1U<<31);
716                 }
717                 spin_unlock(&hash_lock);
718                 trim_marked(tree);
719                 drop_collected_mounts(root_mnt);
720 skip_it:
721                 put_tree(tree);
722                 mutex_lock(&audit_filter_mutex);
723         }
724         list_del(&cursor);
725         mutex_unlock(&audit_filter_mutex);
726 }
727
728 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
729 {
730
731         if (pathname[0] != '/' ||
732             rule->listnr != AUDIT_FILTER_EXIT ||
733             op != Audit_equal ||
734             rule->inode_f || rule->watch || rule->tree)
735                 return -EINVAL;
736         rule->tree = alloc_tree(pathname);
737         if (!rule->tree)
738                 return -ENOMEM;
739         return 0;
740 }
741
742 void audit_put_tree(struct audit_tree *tree)
743 {
744         put_tree(tree);
745 }
746
747 static int tag_mount(struct vfsmount *mnt, void *arg)
748 {
749         return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
750 }
751
752 /*
753  * That gets run when evict_chunk() ends up needing to kill audit_tree.
754  * Runs from a separate thread.
755  */
756 static int prune_tree_thread(void *unused)
757 {
758         for (;;) {
759                 if (list_empty(&prune_list)) {
760                         set_current_state(TASK_INTERRUPTIBLE);
761                         schedule();
762                 }
763
764                 audit_ctl_lock();
765                 mutex_lock(&audit_filter_mutex);
766
767                 while (!list_empty(&prune_list)) {
768                         struct audit_tree *victim;
769
770                         victim = list_entry(prune_list.next,
771                                         struct audit_tree, list);
772                         list_del_init(&victim->list);
773
774                         mutex_unlock(&audit_filter_mutex);
775
776                         prune_one(victim);
777
778                         mutex_lock(&audit_filter_mutex);
779                 }
780
781                 mutex_unlock(&audit_filter_mutex);
782                 audit_ctl_unlock();
783         }
784         return 0;
785 }
786
787 static int audit_launch_prune(void)
788 {
789         if (prune_thread)
790                 return 0;
791         prune_thread = kthread_run(prune_tree_thread, NULL,
792                                 "audit_prune_tree");
793         if (IS_ERR(prune_thread)) {
794                 pr_err("cannot start thread audit_prune_tree");
795                 prune_thread = NULL;
796                 return -ENOMEM;
797         }
798         return 0;
799 }
800
801 /* called with audit_filter_mutex */
802 int audit_add_tree_rule(struct audit_krule *rule)
803 {
804         struct audit_tree *seed = rule->tree, *tree;
805         struct path path;
806         struct vfsmount *mnt;
807         int err;
808
809         rule->tree = NULL;
810         list_for_each_entry(tree, &tree_list, list) {
811                 if (!strcmp(seed->pathname, tree->pathname)) {
812                         put_tree(seed);
813                         rule->tree = tree;
814                         list_add(&rule->rlist, &tree->rules);
815                         return 0;
816                 }
817         }
818         tree = seed;
819         list_add(&tree->list, &tree_list);
820         list_add(&rule->rlist, &tree->rules);
821         /* do not set rule->tree yet */
822         mutex_unlock(&audit_filter_mutex);
823
824         if (unlikely(!prune_thread)) {
825                 err = audit_launch_prune();
826                 if (err)
827                         goto Err;
828         }
829
830         err = kern_path(tree->pathname, 0, &path);
831         if (err)
832                 goto Err;
833         mnt = collect_mounts(&path);
834         path_put(&path);
835         if (IS_ERR(mnt)) {
836                 err = PTR_ERR(mnt);
837                 goto Err;
838         }
839
840         get_tree(tree);
841         err = iterate_mounts(tag_mount, tree, mnt);
842         drop_collected_mounts(mnt);
843
844         if (!err) {
845                 struct node *node;
846                 spin_lock(&hash_lock);
847                 list_for_each_entry(node, &tree->chunks, list)
848                         node->index &= ~(1U<<31);
849                 spin_unlock(&hash_lock);
850         } else {
851                 trim_marked(tree);
852                 goto Err;
853         }
854
855         mutex_lock(&audit_filter_mutex);
856         if (list_empty(&rule->rlist)) {
857                 put_tree(tree);
858                 return -ENOENT;
859         }
860         rule->tree = tree;
861         put_tree(tree);
862
863         return 0;
864 Err:
865         mutex_lock(&audit_filter_mutex);
866         list_del_init(&tree->list);
867         list_del_init(&tree->rules);
868         put_tree(tree);
869         return err;
870 }
871
872 int audit_tag_tree(char *old, char *new)
873 {
874         struct list_head cursor, barrier;
875         int failed = 0;
876         struct path path1, path2;
877         struct vfsmount *tagged;
878         int err;
879
880         err = kern_path(new, 0, &path2);
881         if (err)
882                 return err;
883         tagged = collect_mounts(&path2);
884         path_put(&path2);
885         if (IS_ERR(tagged))
886                 return PTR_ERR(tagged);
887
888         err = kern_path(old, 0, &path1);
889         if (err) {
890                 drop_collected_mounts(tagged);
891                 return err;
892         }
893
894         mutex_lock(&audit_filter_mutex);
895         list_add(&barrier, &tree_list);
896         list_add(&cursor, &barrier);
897
898         while (cursor.next != &tree_list) {
899                 struct audit_tree *tree;
900                 int good_one = 0;
901
902                 tree = container_of(cursor.next, struct audit_tree, list);
903                 get_tree(tree);
904                 list_del(&cursor);
905                 list_add(&cursor, &tree->list);
906                 mutex_unlock(&audit_filter_mutex);
907
908                 err = kern_path(tree->pathname, 0, &path2);
909                 if (!err) {
910                         good_one = path_is_under(&path1, &path2);
911                         path_put(&path2);
912                 }
913
914                 if (!good_one) {
915                         put_tree(tree);
916                         mutex_lock(&audit_filter_mutex);
917                         continue;
918                 }
919
920                 failed = iterate_mounts(tag_mount, tree, tagged);
921                 if (failed) {
922                         put_tree(tree);
923                         mutex_lock(&audit_filter_mutex);
924                         break;
925                 }
926
927                 mutex_lock(&audit_filter_mutex);
928                 spin_lock(&hash_lock);
929                 if (!tree->goner) {
930                         list_del(&tree->list);
931                         list_add(&tree->list, &tree_list);
932                 }
933                 spin_unlock(&hash_lock);
934                 put_tree(tree);
935         }
936
937         while (barrier.prev != &tree_list) {
938                 struct audit_tree *tree;
939
940                 tree = container_of(barrier.prev, struct audit_tree, list);
941                 get_tree(tree);
942                 list_del(&tree->list);
943                 list_add(&tree->list, &barrier);
944                 mutex_unlock(&audit_filter_mutex);
945
946                 if (!failed) {
947                         struct node *node;
948                         spin_lock(&hash_lock);
949                         list_for_each_entry(node, &tree->chunks, list)
950                                 node->index &= ~(1U<<31);
951                         spin_unlock(&hash_lock);
952                 } else {
953                         trim_marked(tree);
954                 }
955
956                 put_tree(tree);
957                 mutex_lock(&audit_filter_mutex);
958         }
959         list_del(&barrier);
960         list_del(&cursor);
961         mutex_unlock(&audit_filter_mutex);
962         path_put(&path1);
963         drop_collected_mounts(tagged);
964         return failed;
965 }
966
967
968 static void audit_schedule_prune(void)
969 {
970         wake_up_process(prune_thread);
971 }
972
973 /*
974  * ... and that one is done if evict_chunk() decides to delay until the end
975  * of syscall.  Runs synchronously.
976  */
977 void audit_kill_trees(struct audit_context *context)
978 {
979         struct list_head *list = &context->killed_trees;
980
981         audit_ctl_lock();
982         mutex_lock(&audit_filter_mutex);
983
984         while (!list_empty(list)) {
985                 struct audit_tree *victim;
986
987                 victim = list_entry(list->next, struct audit_tree, list);
988                 kill_rules(context, victim);
989                 list_del_init(&victim->list);
990
991                 mutex_unlock(&audit_filter_mutex);
992
993                 prune_one(victim);
994
995                 mutex_lock(&audit_filter_mutex);
996         }
997
998         mutex_unlock(&audit_filter_mutex);
999         audit_ctl_unlock();
1000 }
1001
1002 /*
1003  *  Here comes the stuff asynchronous to auditctl operations
1004  */
1005
1006 static void evict_chunk(struct audit_chunk *chunk)
1007 {
1008         struct audit_tree *owner;
1009         struct list_head *postponed = audit_killed_trees();
1010         int need_prune = 0;
1011         int n;
1012
1013         mutex_lock(&audit_filter_mutex);
1014         spin_lock(&hash_lock);
1015         while (!list_empty(&chunk->trees)) {
1016                 owner = list_entry(chunk->trees.next,
1017                                    struct audit_tree, same_root);
1018                 owner->goner = 1;
1019                 owner->root = NULL;
1020                 list_del_init(&owner->same_root);
1021                 spin_unlock(&hash_lock);
1022                 if (!postponed) {
1023                         kill_rules(audit_context(), owner);
1024                         list_move(&owner->list, &prune_list);
1025                         need_prune = 1;
1026                 } else {
1027                         list_move(&owner->list, postponed);
1028                 }
1029                 spin_lock(&hash_lock);
1030         }
1031         list_del_rcu(&chunk->hash);
1032         for (n = 0; n < chunk->count; n++)
1033                 list_del_init(&chunk->owners[n].list);
1034         spin_unlock(&hash_lock);
1035         mutex_unlock(&audit_filter_mutex);
1036         if (need_prune)
1037                 audit_schedule_prune();
1038 }
1039
1040 static int audit_tree_handle_event(struct fsnotify_group *group,
1041                                    struct inode *to_tell,
1042                                    u32 mask, const void *data, int data_type,
1043                                    const struct qstr *file_name, u32 cookie,
1044                                    struct fsnotify_iter_info *iter_info)
1045 {
1046         return 0;
1047 }
1048
1049 static void audit_tree_freeing_mark(struct fsnotify_mark *mark,
1050                                     struct fsnotify_group *group)
1051 {
1052         struct audit_chunk *chunk;
1053
1054         mutex_lock(&mark->group->mark_mutex);
1055         spin_lock(&hash_lock);
1056         chunk = mark_chunk(mark);
1057         replace_mark_chunk(mark, NULL);
1058         spin_unlock(&hash_lock);
1059         mutex_unlock(&mark->group->mark_mutex);
1060         if (chunk) {
1061                 evict_chunk(chunk);
1062                 audit_mark_put_chunk(chunk);
1063         }
1064
1065         /*
1066          * We are guaranteed to have at least one reference to the mark from
1067          * either the inode or the caller of fsnotify_destroy_mark().
1068          */
1069         BUG_ON(refcount_read(&mark->refcnt) < 1);
1070 }
1071
1072 static const struct fsnotify_ops audit_tree_ops = {
1073         .handle_event = audit_tree_handle_event,
1074         .freeing_mark = audit_tree_freeing_mark,
1075         .free_mark = audit_tree_destroy_watch,
1076 };
1077
1078 static int __init audit_tree_init(void)
1079 {
1080         int i;
1081
1082         audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
1083
1084         audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1085         if (IS_ERR(audit_tree_group))
1086                 audit_panic("cannot initialize fsnotify group for rectree watches");
1087
1088         for (i = 0; i < HASH_SIZE; i++)
1089                 INIT_LIST_HEAD(&chunk_hash_heads[i]);
1090
1091         return 0;
1092 }
1093 __initcall(audit_tree_init);