landlock: Reduce the maximum number of layers to 16
[platform/kernel/linux-rpi.git] / security / landlock / fs.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Landlock LSM - Filesystem management and hooks
4  *
5  * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
6  * Copyright © 2018-2020 ANSSI
7  */
8
9 #include <linux/atomic.h>
10 #include <linux/bitops.h>
11 #include <linux/bits.h>
12 #include <linux/compiler_types.h>
13 #include <linux/dcache.h>
14 #include <linux/err.h>
15 #include <linux/fs.h>
16 #include <linux/init.h>
17 #include <linux/kernel.h>
18 #include <linux/limits.h>
19 #include <linux/list.h>
20 #include <linux/lsm_hooks.h>
21 #include <linux/mount.h>
22 #include <linux/namei.h>
23 #include <linux/path.h>
24 #include <linux/rcupdate.h>
25 #include <linux/spinlock.h>
26 #include <linux/stat.h>
27 #include <linux/types.h>
28 #include <linux/wait_bit.h>
29 #include <linux/workqueue.h>
30 #include <uapi/linux/landlock.h>
31
32 #include "common.h"
33 #include "cred.h"
34 #include "fs.h"
35 #include "limits.h"
36 #include "object.h"
37 #include "ruleset.h"
38 #include "setup.h"
39
40 /* Underlying object management */
41
42 static void release_inode(struct landlock_object *const object)
43         __releases(object->lock)
44 {
45         struct inode *const inode = object->underobj;
46         struct super_block *sb;
47
48         if (!inode) {
49                 spin_unlock(&object->lock);
50                 return;
51         }
52
53         /*
54          * Protects against concurrent use by hook_sb_delete() of the reference
55          * to the underlying inode.
56          */
57         object->underobj = NULL;
58         /*
59          * Makes sure that if the filesystem is concurrently unmounted,
60          * hook_sb_delete() will wait for us to finish iput().
61          */
62         sb = inode->i_sb;
63         atomic_long_inc(&landlock_superblock(sb)->inode_refs);
64         spin_unlock(&object->lock);
65         /*
66          * Because object->underobj was not NULL, hook_sb_delete() and
67          * get_inode_object() guarantee that it is safe to reset
68          * landlock_inode(inode)->object while it is not NULL.  It is therefore
69          * not necessary to lock inode->i_lock.
70          */
71         rcu_assign_pointer(landlock_inode(inode)->object, NULL);
72         /*
73          * Now, new rules can safely be tied to @inode with get_inode_object().
74          */
75
76         iput(inode);
77         if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
78                 wake_up_var(&landlock_superblock(sb)->inode_refs);
79 }
80
81 static const struct landlock_object_underops landlock_fs_underops = {
82         .release = release_inode
83 };
84
85 /* Ruleset management */
86
87 static struct landlock_object *get_inode_object(struct inode *const inode)
88 {
89         struct landlock_object *object, *new_object;
90         struct landlock_inode_security *inode_sec = landlock_inode(inode);
91
92         rcu_read_lock();
93 retry:
94         object = rcu_dereference(inode_sec->object);
95         if (object) {
96                 if (likely(refcount_inc_not_zero(&object->usage))) {
97                         rcu_read_unlock();
98                         return object;
99                 }
100                 /*
101                  * We are racing with release_inode(), the object is going
102                  * away.  Wait for release_inode(), then retry.
103                  */
104                 spin_lock(&object->lock);
105                 spin_unlock(&object->lock);
106                 goto retry;
107         }
108         rcu_read_unlock();
109
110         /*
111          * If there is no object tied to @inode, then create a new one (without
112          * holding any locks).
113          */
114         new_object = landlock_create_object(&landlock_fs_underops, inode);
115         if (IS_ERR(new_object))
116                 return new_object;
117
118         /*
119          * Protects against concurrent calls to get_inode_object() or
120          * hook_sb_delete().
121          */
122         spin_lock(&inode->i_lock);
123         if (unlikely(rcu_access_pointer(inode_sec->object))) {
124                 /* Someone else just created the object, bail out and retry. */
125                 spin_unlock(&inode->i_lock);
126                 kfree(new_object);
127
128                 rcu_read_lock();
129                 goto retry;
130         }
131
132         /*
133          * @inode will be released by hook_sb_delete() on its superblock
134          * shutdown, or by release_inode() when no more ruleset references the
135          * related object.
136          */
137         ihold(inode);
138         rcu_assign_pointer(inode_sec->object, new_object);
139         spin_unlock(&inode->i_lock);
140         return new_object;
141 }
142
143 /* All access rights that can be tied to files. */
144 /* clang-format off */
145 #define ACCESS_FILE ( \
146         LANDLOCK_ACCESS_FS_EXECUTE | \
147         LANDLOCK_ACCESS_FS_WRITE_FILE | \
148         LANDLOCK_ACCESS_FS_READ_FILE)
149 /* clang-format on */
150
151 /*
152  * @path: Should have been checked by get_path_from_fd().
153  */
154 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
155                             const struct path *const path,
156                             access_mask_t access_rights)
157 {
158         int err;
159         struct landlock_object *object;
160
161         /* Files only get access rights that make sense. */
162         if (!d_is_dir(path->dentry) &&
163             (access_rights | ACCESS_FILE) != ACCESS_FILE)
164                 return -EINVAL;
165         if (WARN_ON_ONCE(ruleset->num_layers != 1))
166                 return -EINVAL;
167
168         /* Transforms relative access rights to absolute ones. */
169         access_rights |= LANDLOCK_MASK_ACCESS_FS & ~ruleset->fs_access_masks[0];
170         object = get_inode_object(d_backing_inode(path->dentry));
171         if (IS_ERR(object))
172                 return PTR_ERR(object);
173         mutex_lock(&ruleset->lock);
174         err = landlock_insert_rule(ruleset, object, access_rights);
175         mutex_unlock(&ruleset->lock);
176         /*
177          * No need to check for an error because landlock_insert_rule()
178          * increments the refcount for the new object if needed.
179          */
180         landlock_put_object(object);
181         return err;
182 }
183
184 /* Access-control management */
185
186 static inline layer_mask_t
187 unmask_layers(const struct landlock_ruleset *const domain,
188               const struct path *const path, const access_mask_t access_request,
189               layer_mask_t layer_mask)
190 {
191         const struct landlock_rule *rule;
192         const struct inode *inode;
193         size_t i;
194
195         if (d_is_negative(path->dentry))
196                 /* Ignore nonexistent leafs. */
197                 return layer_mask;
198         inode = d_backing_inode(path->dentry);
199         rcu_read_lock();
200         rule = landlock_find_rule(
201                 domain, rcu_dereference(landlock_inode(inode)->object));
202         rcu_read_unlock();
203         if (!rule)
204                 return layer_mask;
205
206         /*
207          * An access is granted if, for each policy layer, at least one rule
208          * encountered on the pathwalk grants the requested accesses,
209          * regardless of their position in the layer stack.  We must then check
210          * the remaining layers for each inode, from the first added layer to
211          * the last one.
212          */
213         for (i = 0; i < rule->num_layers; i++) {
214                 const struct landlock_layer *const layer = &rule->layers[i];
215                 const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
216
217                 /* Checks that the layer grants access to the full request. */
218                 if ((layer->access & access_request) == access_request) {
219                         layer_mask &= ~layer_bit;
220
221                         if (layer_mask == 0)
222                                 return layer_mask;
223                 }
224         }
225         return layer_mask;
226 }
227
228 static int check_access_path(const struct landlock_ruleset *const domain,
229                              const struct path *const path,
230                              const access_mask_t access_request)
231 {
232         bool allowed = false;
233         struct path walker_path;
234         layer_mask_t layer_mask;
235         size_t i;
236
237         if (!access_request)
238                 return 0;
239         if (WARN_ON_ONCE(!domain || !path))
240                 return 0;
241         /*
242          * Allows access to pseudo filesystems that will never be mountable
243          * (e.g. sockfs, pipefs), but can still be reachable through
244          * /proc/<pid>/fd/<file-descriptor> .
245          */
246         if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
247             (d_is_positive(path->dentry) &&
248              unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
249                 return 0;
250         if (WARN_ON_ONCE(domain->num_layers < 1))
251                 return -EACCES;
252
253         /* Saves all layers handling a subset of requested accesses. */
254         layer_mask = 0;
255         for (i = 0; i < domain->num_layers; i++) {
256                 if (domain->fs_access_masks[i] & access_request)
257                         layer_mask |= BIT_ULL(i);
258         }
259         /* An access request not handled by the domain is allowed. */
260         if (layer_mask == 0)
261                 return 0;
262
263         walker_path = *path;
264         path_get(&walker_path);
265         /*
266          * We need to walk through all the hierarchy to not miss any relevant
267          * restriction.
268          */
269         while (true) {
270                 struct dentry *parent_dentry;
271
272                 layer_mask = unmask_layers(domain, &walker_path, access_request,
273                                            layer_mask);
274                 if (layer_mask == 0) {
275                         /* Stops when a rule from each layer grants access. */
276                         allowed = true;
277                         break;
278                 }
279
280 jump_up:
281                 if (walker_path.dentry == walker_path.mnt->mnt_root) {
282                         if (follow_up(&walker_path)) {
283                                 /* Ignores hidden mount points. */
284                                 goto jump_up;
285                         } else {
286                                 /*
287                                  * Stops at the real root.  Denies access
288                                  * because not all layers have granted access.
289                                  */
290                                 allowed = false;
291                                 break;
292                         }
293                 }
294                 if (unlikely(IS_ROOT(walker_path.dentry))) {
295                         /*
296                          * Stops at disconnected root directories.  Only allows
297                          * access to internal filesystems (e.g. nsfs, which is
298                          * reachable through /proc/<pid>/ns/<namespace>).
299                          */
300                         allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
301                         break;
302                 }
303                 parent_dentry = dget_parent(walker_path.dentry);
304                 dput(walker_path.dentry);
305                 walker_path.dentry = parent_dentry;
306         }
307         path_put(&walker_path);
308         return allowed ? 0 : -EACCES;
309 }
310
311 static inline int current_check_access_path(const struct path *const path,
312                                             const access_mask_t access_request)
313 {
314         const struct landlock_ruleset *const dom =
315                 landlock_get_current_domain();
316
317         if (!dom)
318                 return 0;
319         return check_access_path(dom, path, access_request);
320 }
321
322 /* Inode hooks */
323
324 static void hook_inode_free_security(struct inode *const inode)
325 {
326         /*
327          * All inodes must already have been untied from their object by
328          * release_inode() or hook_sb_delete().
329          */
330         WARN_ON_ONCE(landlock_inode(inode)->object);
331 }
332
333 /* Super-block hooks */
334
335 /*
336  * Release the inodes used in a security policy.
337  *
338  * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
339  */
340 static void hook_sb_delete(struct super_block *const sb)
341 {
342         struct inode *inode, *prev_inode = NULL;
343
344         if (!landlock_initialized)
345                 return;
346
347         spin_lock(&sb->s_inode_list_lock);
348         list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
349                 struct landlock_object *object;
350
351                 /* Only handles referenced inodes. */
352                 if (!atomic_read(&inode->i_count))
353                         continue;
354
355                 /*
356                  * Protects against concurrent modification of inode (e.g.
357                  * from get_inode_object()).
358                  */
359                 spin_lock(&inode->i_lock);
360                 /*
361                  * Checks I_FREEING and I_WILL_FREE  to protect against a race
362                  * condition when release_inode() just called iput(), which
363                  * could lead to a NULL dereference of inode->security or a
364                  * second call to iput() for the same Landlock object.  Also
365                  * checks I_NEW because such inode cannot be tied to an object.
366                  */
367                 if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
368                         spin_unlock(&inode->i_lock);
369                         continue;
370                 }
371
372                 rcu_read_lock();
373                 object = rcu_dereference(landlock_inode(inode)->object);
374                 if (!object) {
375                         rcu_read_unlock();
376                         spin_unlock(&inode->i_lock);
377                         continue;
378                 }
379                 /* Keeps a reference to this inode until the next loop walk. */
380                 __iget(inode);
381                 spin_unlock(&inode->i_lock);
382
383                 /*
384                  * If there is no concurrent release_inode() ongoing, then we
385                  * are in charge of calling iput() on this inode, otherwise we
386                  * will just wait for it to finish.
387                  */
388                 spin_lock(&object->lock);
389                 if (object->underobj == inode) {
390                         object->underobj = NULL;
391                         spin_unlock(&object->lock);
392                         rcu_read_unlock();
393
394                         /*
395                          * Because object->underobj was not NULL,
396                          * release_inode() and get_inode_object() guarantee
397                          * that it is safe to reset
398                          * landlock_inode(inode)->object while it is not NULL.
399                          * It is therefore not necessary to lock inode->i_lock.
400                          */
401                         rcu_assign_pointer(landlock_inode(inode)->object, NULL);
402                         /*
403                          * At this point, we own the ihold() reference that was
404                          * originally set up by get_inode_object() and the
405                          * __iget() reference that we just set in this loop
406                          * walk.  Therefore the following call to iput() will
407                          * not sleep nor drop the inode because there is now at
408                          * least two references to it.
409                          */
410                         iput(inode);
411                 } else {
412                         spin_unlock(&object->lock);
413                         rcu_read_unlock();
414                 }
415
416                 if (prev_inode) {
417                         /*
418                          * At this point, we still own the __iget() reference
419                          * that we just set in this loop walk.  Therefore we
420                          * can drop the list lock and know that the inode won't
421                          * disappear from under us until the next loop walk.
422                          */
423                         spin_unlock(&sb->s_inode_list_lock);
424                         /*
425                          * We can now actually put the inode reference from the
426                          * previous loop walk, which is not needed anymore.
427                          */
428                         iput(prev_inode);
429                         cond_resched();
430                         spin_lock(&sb->s_inode_list_lock);
431                 }
432                 prev_inode = inode;
433         }
434         spin_unlock(&sb->s_inode_list_lock);
435
436         /* Puts the inode reference from the last loop walk, if any. */
437         if (prev_inode)
438                 iput(prev_inode);
439         /* Waits for pending iput() in release_inode(). */
440         wait_var_event(&landlock_superblock(sb)->inode_refs,
441                        !atomic_long_read(&landlock_superblock(sb)->inode_refs));
442 }
443
444 /*
445  * Because a Landlock security policy is defined according to the filesystem
446  * topology (i.e. the mount namespace), changing it may grant access to files
447  * not previously allowed.
448  *
449  * To make it simple, deny any filesystem topology modification by landlocked
450  * processes.  Non-landlocked processes may still change the namespace of a
451  * landlocked process, but this kind of threat must be handled by a system-wide
452  * access-control security policy.
453  *
454  * This could be lifted in the future if Landlock can safely handle mount
455  * namespace updates requested by a landlocked process.  Indeed, we could
456  * update the current domain (which is currently read-only) by taking into
457  * account the accesses of the source and the destination of a new mount point.
458  * However, it would also require to make all the child domains dynamically
459  * inherit these new constraints.  Anyway, for backward compatibility reasons,
460  * a dedicated user space option would be required (e.g. as a ruleset flag).
461  */
462 static int hook_sb_mount(const char *const dev_name,
463                          const struct path *const path, const char *const type,
464                          const unsigned long flags, void *const data)
465 {
466         if (!landlock_get_current_domain())
467                 return 0;
468         return -EPERM;
469 }
470
471 static int hook_move_mount(const struct path *const from_path,
472                            const struct path *const to_path)
473 {
474         if (!landlock_get_current_domain())
475                 return 0;
476         return -EPERM;
477 }
478
479 /*
480  * Removing a mount point may reveal a previously hidden file hierarchy, which
481  * may then grant access to files, which may have previously been forbidden.
482  */
483 static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
484 {
485         if (!landlock_get_current_domain())
486                 return 0;
487         return -EPERM;
488 }
489
490 static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
491 {
492         if (!landlock_get_current_domain())
493                 return 0;
494         return -EPERM;
495 }
496
497 /*
498  * pivot_root(2), like mount(2), changes the current mount namespace.  It must
499  * then be forbidden for a landlocked process.
500  *
501  * However, chroot(2) may be allowed because it only changes the relative root
502  * directory of the current process.  Moreover, it can be used to restrict the
503  * view of the filesystem.
504  */
505 static int hook_sb_pivotroot(const struct path *const old_path,
506                              const struct path *const new_path)
507 {
508         if (!landlock_get_current_domain())
509                 return 0;
510         return -EPERM;
511 }
512
513 /* Path hooks */
514
515 static inline access_mask_t get_mode_access(const umode_t mode)
516 {
517         switch (mode & S_IFMT) {
518         case S_IFLNK:
519                 return LANDLOCK_ACCESS_FS_MAKE_SYM;
520         case 0:
521                 /* A zero mode translates to S_IFREG. */
522         case S_IFREG:
523                 return LANDLOCK_ACCESS_FS_MAKE_REG;
524         case S_IFDIR:
525                 return LANDLOCK_ACCESS_FS_MAKE_DIR;
526         case S_IFCHR:
527                 return LANDLOCK_ACCESS_FS_MAKE_CHAR;
528         case S_IFBLK:
529                 return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
530         case S_IFIFO:
531                 return LANDLOCK_ACCESS_FS_MAKE_FIFO;
532         case S_IFSOCK:
533                 return LANDLOCK_ACCESS_FS_MAKE_SOCK;
534         default:
535                 WARN_ON_ONCE(1);
536                 return 0;
537         }
538 }
539
540 /*
541  * Creating multiple links or renaming may lead to privilege escalations if not
542  * handled properly.  Indeed, we must be sure that the source doesn't gain more
543  * privileges by being accessible from the destination.  This is getting more
544  * complex when dealing with multiple layers.  The whole picture can be seen as
545  * a multilayer partial ordering problem.  A future version of Landlock will
546  * deal with that.
547  */
548 static int hook_path_link(struct dentry *const old_dentry,
549                           const struct path *const new_dir,
550                           struct dentry *const new_dentry)
551 {
552         const struct landlock_ruleset *const dom =
553                 landlock_get_current_domain();
554
555         if (!dom)
556                 return 0;
557         /* The mount points are the same for old and new paths, cf. EXDEV. */
558         if (old_dentry->d_parent != new_dir->dentry)
559                 /* Gracefully forbids reparenting. */
560                 return -EXDEV;
561         if (unlikely(d_is_negative(old_dentry)))
562                 return -ENOENT;
563         return check_access_path(
564                 dom, new_dir,
565                 get_mode_access(d_backing_inode(old_dentry)->i_mode));
566 }
567
568 static inline access_mask_t maybe_remove(const struct dentry *const dentry)
569 {
570         if (d_is_negative(dentry))
571                 return 0;
572         return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
573                                   LANDLOCK_ACCESS_FS_REMOVE_FILE;
574 }
575
576 static int hook_path_rename(const struct path *const old_dir,
577                             struct dentry *const old_dentry,
578                             const struct path *const new_dir,
579                             struct dentry *const new_dentry)
580 {
581         const struct landlock_ruleset *const dom =
582                 landlock_get_current_domain();
583
584         if (!dom)
585                 return 0;
586         /* The mount points are the same for old and new paths, cf. EXDEV. */
587         if (old_dir->dentry != new_dir->dentry)
588                 /* Gracefully forbids reparenting. */
589                 return -EXDEV;
590         if (unlikely(d_is_negative(old_dentry)))
591                 return -ENOENT;
592         /* RENAME_EXCHANGE is handled because directories are the same. */
593         return check_access_path(
594                 dom, old_dir,
595                 maybe_remove(old_dentry) | maybe_remove(new_dentry) |
596                         get_mode_access(d_backing_inode(old_dentry)->i_mode));
597 }
598
599 static int hook_path_mkdir(const struct path *const dir,
600                            struct dentry *const dentry, const umode_t mode)
601 {
602         return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
603 }
604
605 static int hook_path_mknod(const struct path *const dir,
606                            struct dentry *const dentry, const umode_t mode,
607                            const unsigned int dev)
608 {
609         const struct landlock_ruleset *const dom =
610                 landlock_get_current_domain();
611
612         if (!dom)
613                 return 0;
614         return check_access_path(dom, dir, get_mode_access(mode));
615 }
616
617 static int hook_path_symlink(const struct path *const dir,
618                              struct dentry *const dentry,
619                              const char *const old_name)
620 {
621         return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
622 }
623
624 static int hook_path_unlink(const struct path *const dir,
625                             struct dentry *const dentry)
626 {
627         return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
628 }
629
630 static int hook_path_rmdir(const struct path *const dir,
631                            struct dentry *const dentry)
632 {
633         return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
634 }
635
636 /* File hooks */
637
638 static inline access_mask_t get_file_access(const struct file *const file)
639 {
640         access_mask_t access = 0;
641
642         if (file->f_mode & FMODE_READ) {
643                 /* A directory can only be opened in read mode. */
644                 if (S_ISDIR(file_inode(file)->i_mode))
645                         return LANDLOCK_ACCESS_FS_READ_DIR;
646                 access = LANDLOCK_ACCESS_FS_READ_FILE;
647         }
648         if (file->f_mode & FMODE_WRITE)
649                 access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
650         /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
651         if (file->f_flags & __FMODE_EXEC)
652                 access |= LANDLOCK_ACCESS_FS_EXECUTE;
653         return access;
654 }
655
656 static int hook_file_open(struct file *const file)
657 {
658         const struct landlock_ruleset *const dom =
659                 landlock_get_current_domain();
660
661         if (!dom)
662                 return 0;
663         /*
664          * Because a file may be opened with O_PATH, get_file_access() may
665          * return 0.  This case will be handled with a future Landlock
666          * evolution.
667          */
668         return check_access_path(dom, &file->f_path, get_file_access(file));
669 }
670
671 static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
672         LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
673
674         LSM_HOOK_INIT(sb_delete, hook_sb_delete),
675         LSM_HOOK_INIT(sb_mount, hook_sb_mount),
676         LSM_HOOK_INIT(move_mount, hook_move_mount),
677         LSM_HOOK_INIT(sb_umount, hook_sb_umount),
678         LSM_HOOK_INIT(sb_remount, hook_sb_remount),
679         LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
680
681         LSM_HOOK_INIT(path_link, hook_path_link),
682         LSM_HOOK_INIT(path_rename, hook_path_rename),
683         LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
684         LSM_HOOK_INIT(path_mknod, hook_path_mknod),
685         LSM_HOOK_INIT(path_symlink, hook_path_symlink),
686         LSM_HOOK_INIT(path_unlink, hook_path_unlink),
687         LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
688
689         LSM_HOOK_INIT(file_open, hook_file_open),
690 };
691
692 __init void landlock_add_fs_hooks(void)
693 {
694         security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
695                            LANDLOCK_NAME);
696 }