1 // SPDX-License-Identifier: GPL-2.0-only
3 * Landlock LSM - Filesystem management and hooks
5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
6 * Copyright © 2018-2020 ANSSI
9 #include <linux/atomic.h>
10 #include <linux/bitops.h>
11 #include <linux/bits.h>
12 #include <linux/compiler_types.h>
13 #include <linux/dcache.h>
14 #include <linux/err.h>
16 #include <linux/init.h>
17 #include <linux/kernel.h>
18 #include <linux/limits.h>
19 #include <linux/list.h>
20 #include <linux/lsm_hooks.h>
21 #include <linux/mount.h>
22 #include <linux/namei.h>
23 #include <linux/path.h>
24 #include <linux/rcupdate.h>
25 #include <linux/spinlock.h>
26 #include <linux/stat.h>
27 #include <linux/types.h>
28 #include <linux/wait_bit.h>
29 #include <linux/workqueue.h>
30 #include <uapi/linux/landlock.h>
40 /* Underlying object management */
42 static void release_inode(struct landlock_object *const object)
43 __releases(object->lock)
45 struct inode *const inode = object->underobj;
46 struct super_block *sb;
49 spin_unlock(&object->lock);
54 * Protects against concurrent use by hook_sb_delete() of the reference
55 * to the underlying inode.
57 object->underobj = NULL;
59 * Makes sure that if the filesystem is concurrently unmounted,
60 * hook_sb_delete() will wait for us to finish iput().
63 atomic_long_inc(&landlock_superblock(sb)->inode_refs);
64 spin_unlock(&object->lock);
66 * Because object->underobj was not NULL, hook_sb_delete() and
67 * get_inode_object() guarantee that it is safe to reset
68 * landlock_inode(inode)->object while it is not NULL. It is therefore
69 * not necessary to lock inode->i_lock.
71 rcu_assign_pointer(landlock_inode(inode)->object, NULL);
73 * Now, new rules can safely be tied to @inode with get_inode_object().
77 if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
78 wake_up_var(&landlock_superblock(sb)->inode_refs);
81 static const struct landlock_object_underops landlock_fs_underops = {
82 .release = release_inode
85 /* Ruleset management */
87 static struct landlock_object *get_inode_object(struct inode *const inode)
89 struct landlock_object *object, *new_object;
90 struct landlock_inode_security *inode_sec = landlock_inode(inode);
94 object = rcu_dereference(inode_sec->object);
96 if (likely(refcount_inc_not_zero(&object->usage))) {
101 * We are racing with release_inode(), the object is going
102 * away. Wait for release_inode(), then retry.
104 spin_lock(&object->lock);
105 spin_unlock(&object->lock);
111 * If there is no object tied to @inode, then create a new one (without
112 * holding any locks).
114 new_object = landlock_create_object(&landlock_fs_underops, inode);
115 if (IS_ERR(new_object))
119 * Protects against concurrent calls to get_inode_object() or
122 spin_lock(&inode->i_lock);
123 if (unlikely(rcu_access_pointer(inode_sec->object))) {
124 /* Someone else just created the object, bail out and retry. */
125 spin_unlock(&inode->i_lock);
133 * @inode will be released by hook_sb_delete() on its superblock
134 * shutdown, or by release_inode() when no more ruleset references the
138 rcu_assign_pointer(inode_sec->object, new_object);
139 spin_unlock(&inode->i_lock);
143 /* All access rights that can be tied to files. */
144 /* clang-format off */
145 #define ACCESS_FILE ( \
146 LANDLOCK_ACCESS_FS_EXECUTE | \
147 LANDLOCK_ACCESS_FS_WRITE_FILE | \
148 LANDLOCK_ACCESS_FS_READ_FILE)
149 /* clang-format on */
152 * @path: Should have been checked by get_path_from_fd().
154 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
155 const struct path *const path,
156 access_mask_t access_rights)
159 struct landlock_object *object;
161 /* Files only get access rights that make sense. */
162 if (!d_is_dir(path->dentry) &&
163 (access_rights | ACCESS_FILE) != ACCESS_FILE)
165 if (WARN_ON_ONCE(ruleset->num_layers != 1))
168 /* Transforms relative access rights to absolute ones. */
169 access_rights |= LANDLOCK_MASK_ACCESS_FS & ~ruleset->fs_access_masks[0];
170 object = get_inode_object(d_backing_inode(path->dentry));
172 return PTR_ERR(object);
173 mutex_lock(&ruleset->lock);
174 err = landlock_insert_rule(ruleset, object, access_rights);
175 mutex_unlock(&ruleset->lock);
177 * No need to check for an error because landlock_insert_rule()
178 * increments the refcount for the new object if needed.
180 landlock_put_object(object);
184 /* Access-control management */
187 * The lifetime of the returned rule is tied to @domain.
189 * Returns NULL if no rule is found or if @dentry is negative.
191 static inline const struct landlock_rule *
192 find_rule(const struct landlock_ruleset *const domain,
193 const struct dentry *const dentry)
195 const struct landlock_rule *rule;
196 const struct inode *inode;
198 /* Ignores nonexistent leafs. */
199 if (d_is_negative(dentry))
202 inode = d_backing_inode(dentry);
204 rule = landlock_find_rule(
205 domain, rcu_dereference(landlock_inode(inode)->object));
211 * @layer_masks is read and may be updated according to the access request and
214 * Returns true if the request is allowed (i.e. relevant layer masks for the
215 * request are empty).
218 unmask_layers(const struct landlock_rule *const rule,
219 const access_mask_t access_request,
220 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
224 if (!access_request || !layer_masks)
230 * An access is granted if, for each policy layer, at least one rule
231 * encountered on the pathwalk grants the requested access,
232 * regardless of its position in the layer stack. We must then check
233 * the remaining layers for each inode, from the first added layer to
234 * the last one. When there is multiple requested accesses, for each
235 * policy layer, the full set of requested accesses may not be granted
236 * by only one rule, but by the union (binary OR) of multiple rules.
237 * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
239 for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
240 const struct landlock_layer *const layer =
241 &rule->layers[layer_level];
242 const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
243 const unsigned long access_req = access_request;
244 unsigned long access_bit;
248 * Records in @layer_masks which layer grants access to each
252 for_each_set_bit(access_bit, &access_req,
253 ARRAY_SIZE(*layer_masks)) {
254 if (layer->access & BIT_ULL(access_bit))
255 (*layer_masks)[access_bit] &= ~layer_bit;
256 is_empty = is_empty && !(*layer_masks)[access_bit];
264 static int check_access_path(const struct landlock_ruleset *const domain,
265 const struct path *const path,
266 const access_mask_t access_request)
268 layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
269 bool allowed = false, has_access = false;
270 struct path walker_path;
275 if (WARN_ON_ONCE(!domain || !path))
278 * Allows access to pseudo filesystems that will never be mountable
279 * (e.g. sockfs, pipefs), but can still be reachable through
280 * /proc/<pid>/fd/<file-descriptor> .
282 if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
283 (d_is_positive(path->dentry) &&
284 unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
286 if (WARN_ON_ONCE(domain->num_layers < 1))
289 /* Saves all layers handling a subset of requested accesses. */
290 for (i = 0; i < domain->num_layers; i++) {
291 const unsigned long access_req = access_request;
292 unsigned long access_bit;
294 for_each_set_bit(access_bit, &access_req,
295 ARRAY_SIZE(layer_masks)) {
296 if (domain->fs_access_masks[i] & BIT_ULL(access_bit)) {
297 layer_masks[access_bit] |= BIT_ULL(i);
302 /* An access request not handled by the domain is allowed. */
307 path_get(&walker_path);
309 * We need to walk through all the hierarchy to not miss any relevant
313 struct dentry *parent_dentry;
315 allowed = unmask_layers(find_rule(domain, walker_path.dentry),
316 access_request, &layer_masks);
318 /* Stops when a rule from each layer grants access. */
322 if (walker_path.dentry == walker_path.mnt->mnt_root) {
323 if (follow_up(&walker_path)) {
324 /* Ignores hidden mount points. */
328 * Stops at the real root. Denies access
329 * because not all layers have granted access.
335 if (unlikely(IS_ROOT(walker_path.dentry))) {
337 * Stops at disconnected root directories. Only allows
338 * access to internal filesystems (e.g. nsfs, which is
339 * reachable through /proc/<pid>/ns/<namespace>).
341 allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
344 parent_dentry = dget_parent(walker_path.dentry);
345 dput(walker_path.dentry);
346 walker_path.dentry = parent_dentry;
348 path_put(&walker_path);
349 return allowed ? 0 : -EACCES;
352 static inline int current_check_access_path(const struct path *const path,
353 const access_mask_t access_request)
355 const struct landlock_ruleset *const dom =
356 landlock_get_current_domain();
360 return check_access_path(dom, path, access_request);
365 static void hook_inode_free_security(struct inode *const inode)
368 * All inodes must already have been untied from their object by
369 * release_inode() or hook_sb_delete().
371 WARN_ON_ONCE(landlock_inode(inode)->object);
374 /* Super-block hooks */
377 * Release the inodes used in a security policy.
379 * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
381 static void hook_sb_delete(struct super_block *const sb)
383 struct inode *inode, *prev_inode = NULL;
385 if (!landlock_initialized)
388 spin_lock(&sb->s_inode_list_lock);
389 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
390 struct landlock_object *object;
392 /* Only handles referenced inodes. */
393 if (!atomic_read(&inode->i_count))
397 * Protects against concurrent modification of inode (e.g.
398 * from get_inode_object()).
400 spin_lock(&inode->i_lock);
402 * Checks I_FREEING and I_WILL_FREE to protect against a race
403 * condition when release_inode() just called iput(), which
404 * could lead to a NULL dereference of inode->security or a
405 * second call to iput() for the same Landlock object. Also
406 * checks I_NEW because such inode cannot be tied to an object.
408 if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
409 spin_unlock(&inode->i_lock);
414 object = rcu_dereference(landlock_inode(inode)->object);
417 spin_unlock(&inode->i_lock);
420 /* Keeps a reference to this inode until the next loop walk. */
422 spin_unlock(&inode->i_lock);
425 * If there is no concurrent release_inode() ongoing, then we
426 * are in charge of calling iput() on this inode, otherwise we
427 * will just wait for it to finish.
429 spin_lock(&object->lock);
430 if (object->underobj == inode) {
431 object->underobj = NULL;
432 spin_unlock(&object->lock);
436 * Because object->underobj was not NULL,
437 * release_inode() and get_inode_object() guarantee
438 * that it is safe to reset
439 * landlock_inode(inode)->object while it is not NULL.
440 * It is therefore not necessary to lock inode->i_lock.
442 rcu_assign_pointer(landlock_inode(inode)->object, NULL);
444 * At this point, we own the ihold() reference that was
445 * originally set up by get_inode_object() and the
446 * __iget() reference that we just set in this loop
447 * walk. Therefore the following call to iput() will
448 * not sleep nor drop the inode because there is now at
449 * least two references to it.
453 spin_unlock(&object->lock);
459 * At this point, we still own the __iget() reference
460 * that we just set in this loop walk. Therefore we
461 * can drop the list lock and know that the inode won't
462 * disappear from under us until the next loop walk.
464 spin_unlock(&sb->s_inode_list_lock);
466 * We can now actually put the inode reference from the
467 * previous loop walk, which is not needed anymore.
471 spin_lock(&sb->s_inode_list_lock);
475 spin_unlock(&sb->s_inode_list_lock);
477 /* Puts the inode reference from the last loop walk, if any. */
480 /* Waits for pending iput() in release_inode(). */
481 wait_var_event(&landlock_superblock(sb)->inode_refs,
482 !atomic_long_read(&landlock_superblock(sb)->inode_refs));
486 * Because a Landlock security policy is defined according to the filesystem
487 * topology (i.e. the mount namespace), changing it may grant access to files
488 * not previously allowed.
490 * To make it simple, deny any filesystem topology modification by landlocked
491 * processes. Non-landlocked processes may still change the namespace of a
492 * landlocked process, but this kind of threat must be handled by a system-wide
493 * access-control security policy.
495 * This could be lifted in the future if Landlock can safely handle mount
496 * namespace updates requested by a landlocked process. Indeed, we could
497 * update the current domain (which is currently read-only) by taking into
498 * account the accesses of the source and the destination of a new mount point.
499 * However, it would also require to make all the child domains dynamically
500 * inherit these new constraints. Anyway, for backward compatibility reasons,
501 * a dedicated user space option would be required (e.g. as a ruleset flag).
503 static int hook_sb_mount(const char *const dev_name,
504 const struct path *const path, const char *const type,
505 const unsigned long flags, void *const data)
507 if (!landlock_get_current_domain())
512 static int hook_move_mount(const struct path *const from_path,
513 const struct path *const to_path)
515 if (!landlock_get_current_domain())
521 * Removing a mount point may reveal a previously hidden file hierarchy, which
522 * may then grant access to files, which may have previously been forbidden.
524 static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
526 if (!landlock_get_current_domain())
531 static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
533 if (!landlock_get_current_domain())
539 * pivot_root(2), like mount(2), changes the current mount namespace. It must
540 * then be forbidden for a landlocked process.
542 * However, chroot(2) may be allowed because it only changes the relative root
543 * directory of the current process. Moreover, it can be used to restrict the
544 * view of the filesystem.
546 static int hook_sb_pivotroot(const struct path *const old_path,
547 const struct path *const new_path)
549 if (!landlock_get_current_domain())
556 static inline access_mask_t get_mode_access(const umode_t mode)
558 switch (mode & S_IFMT) {
560 return LANDLOCK_ACCESS_FS_MAKE_SYM;
562 /* A zero mode translates to S_IFREG. */
564 return LANDLOCK_ACCESS_FS_MAKE_REG;
566 return LANDLOCK_ACCESS_FS_MAKE_DIR;
568 return LANDLOCK_ACCESS_FS_MAKE_CHAR;
570 return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
572 return LANDLOCK_ACCESS_FS_MAKE_FIFO;
574 return LANDLOCK_ACCESS_FS_MAKE_SOCK;
582 * Creating multiple links or renaming may lead to privilege escalations if not
583 * handled properly. Indeed, we must be sure that the source doesn't gain more
584 * privileges by being accessible from the destination. This is getting more
585 * complex when dealing with multiple layers. The whole picture can be seen as
586 * a multilayer partial ordering problem. A future version of Landlock will
589 static int hook_path_link(struct dentry *const old_dentry,
590 const struct path *const new_dir,
591 struct dentry *const new_dentry)
593 const struct landlock_ruleset *const dom =
594 landlock_get_current_domain();
598 /* The mount points are the same for old and new paths, cf. EXDEV. */
599 if (old_dentry->d_parent != new_dir->dentry)
600 /* Gracefully forbids reparenting. */
602 if (unlikely(d_is_negative(old_dentry)))
604 return check_access_path(
606 get_mode_access(d_backing_inode(old_dentry)->i_mode));
609 static inline access_mask_t maybe_remove(const struct dentry *const dentry)
611 if (d_is_negative(dentry))
613 return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
614 LANDLOCK_ACCESS_FS_REMOVE_FILE;
617 static int hook_path_rename(const struct path *const old_dir,
618 struct dentry *const old_dentry,
619 const struct path *const new_dir,
620 struct dentry *const new_dentry)
622 const struct landlock_ruleset *const dom =
623 landlock_get_current_domain();
627 /* The mount points are the same for old and new paths, cf. EXDEV. */
628 if (old_dir->dentry != new_dir->dentry)
629 /* Gracefully forbids reparenting. */
631 if (unlikely(d_is_negative(old_dentry)))
633 /* RENAME_EXCHANGE is handled because directories are the same. */
634 return check_access_path(
636 maybe_remove(old_dentry) | maybe_remove(new_dentry) |
637 get_mode_access(d_backing_inode(old_dentry)->i_mode));
640 static int hook_path_mkdir(const struct path *const dir,
641 struct dentry *const dentry, const umode_t mode)
643 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
646 static int hook_path_mknod(const struct path *const dir,
647 struct dentry *const dentry, const umode_t mode,
648 const unsigned int dev)
650 const struct landlock_ruleset *const dom =
651 landlock_get_current_domain();
655 return check_access_path(dom, dir, get_mode_access(mode));
658 static int hook_path_symlink(const struct path *const dir,
659 struct dentry *const dentry,
660 const char *const old_name)
662 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
665 static int hook_path_unlink(const struct path *const dir,
666 struct dentry *const dentry)
668 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
671 static int hook_path_rmdir(const struct path *const dir,
672 struct dentry *const dentry)
674 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
679 static inline access_mask_t get_file_access(const struct file *const file)
681 access_mask_t access = 0;
683 if (file->f_mode & FMODE_READ) {
684 /* A directory can only be opened in read mode. */
685 if (S_ISDIR(file_inode(file)->i_mode))
686 return LANDLOCK_ACCESS_FS_READ_DIR;
687 access = LANDLOCK_ACCESS_FS_READ_FILE;
689 if (file->f_mode & FMODE_WRITE)
690 access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
691 /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
692 if (file->f_flags & __FMODE_EXEC)
693 access |= LANDLOCK_ACCESS_FS_EXECUTE;
697 static int hook_file_open(struct file *const file)
699 const struct landlock_ruleset *const dom =
700 landlock_get_current_domain();
705 * Because a file may be opened with O_PATH, get_file_access() may
706 * return 0. This case will be handled with a future Landlock
709 return check_access_path(dom, &file->f_path, get_file_access(file));
712 static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
713 LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
715 LSM_HOOK_INIT(sb_delete, hook_sb_delete),
716 LSM_HOOK_INIT(sb_mount, hook_sb_mount),
717 LSM_HOOK_INIT(move_mount, hook_move_mount),
718 LSM_HOOK_INIT(sb_umount, hook_sb_umount),
719 LSM_HOOK_INIT(sb_remount, hook_sb_remount),
720 LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
722 LSM_HOOK_INIT(path_link, hook_path_link),
723 LSM_HOOK_INIT(path_rename, hook_path_rename),
724 LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
725 LSM_HOOK_INIT(path_mknod, hook_path_mknod),
726 LSM_HOOK_INIT(path_symlink, hook_path_symlink),
727 LSM_HOOK_INIT(path_unlink, hook_path_unlink),
728 LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
730 LSM_HOOK_INIT(file_open, hook_file_open),
733 __init void landlock_add_fs_hooks(void)
735 security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),