2 * device backend utilities
4 * Copyright (C) 2004 Jana Saout <jana@saout.de>
5 * Copyright (C) 2004-2007 Clemens Fruhwirth <clemens@endorphin.org>
6 * Copyright (C) 2009-2021 Red Hat, Inc. All rights reserved.
7 * Copyright (C) 2009-2021 Milan Broz
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #include <sys/types.h>
30 #include <sys/ioctl.h>
33 #ifdef HAVE_SYS_SYSMACROS_H
34 # include <sys/sysmacros.h> /* for major, minor */
36 #ifdef HAVE_SYS_STATVFS_H
37 # include <sys/statvfs.h>
40 #include "utils_device_locking.h"
52 struct crypt_lock_handle *lh;
54 unsigned int o_direct:1;
55 unsigned int init_done:1; /* path is bdev or loop already initialized */
62 static size_t device_fs_block_size_fd(int fd)
64 size_t page_size = crypt_getpagesize();
66 #ifdef HAVE_SYS_STATVFS_H
70 * NOTE: some filesystems (NFS) returns bogus blocksize (1MB).
71 * Page-size io should always work and avoids increasing IO beyond aligned LUKS header.
73 if (!fstatvfs(fd, &buf) && buf.f_bsize && buf.f_bsize <= page_size)
74 return (size_t)buf.f_bsize;
79 static size_t device_block_size_fd(int fd, size_t *min_size)
85 if (fstat(fd, &st) < 0)
88 if (S_ISREG(st.st_mode))
89 bsize = device_fs_block_size_fd(fd);
91 if (ioctl(fd, BLKSSZGET, &arg) < 0)
92 bsize = crypt_getpagesize();
100 if (S_ISREG(st.st_mode)) {
101 /* file can be empty as well */
102 if (st.st_size > (ssize_t)bsize)
105 *min_size = st.st_size;
107 /* block device must have at least one block */
114 static size_t device_alignment_fd(int devfd)
116 long alignment = DEFAULT_MEM_ALIGNMENT;
118 #ifdef _PC_REC_XFER_ALIGN
119 alignment = fpathconf(devfd, _PC_REC_XFER_ALIGN);
121 alignment = DEFAULT_MEM_ALIGNMENT;
123 return (size_t)alignment;
126 static int device_read_test(int devfd)
130 size_t minsize = 0, blocksize, alignment;
132 blocksize = device_block_size_fd(devfd, &minsize);
133 alignment = device_alignment_fd(devfd);
135 if (!blocksize || !alignment)
141 if (minsize > sizeof(buffer))
142 minsize = sizeof(buffer);
144 if (read_blockwise(devfd, blocksize, alignment, buffer, minsize) == (ssize_t)minsize)
147 crypt_safe_memzero(buffer, sizeof(buffer));
152 * The direct-io is always preferred. The header is usually mapped to the same
153 * device and can be accessed when the rest of device is mapped to data device.
154 * Using direct-io ensures that we do not mess with data in cache.
155 * (But proper alignment should prevent this in the first place.)
156 * The read test is needed to detect broken configurations (seen with remote
157 * block devices) that allow open with direct-io but then fails on read.
159 static int device_ready(struct crypt_device *cd, struct device *device)
161 int devfd = -1, r = 0;
168 if (device->o_direct) {
169 log_dbg(cd, "Trying to open and read device %s with direct-io.",
170 device_path(device));
171 device->o_direct = 0;
172 devfd = open(device_path(device), O_RDONLY | O_DIRECT);
174 if (device_read_test(devfd) == 0) {
175 device->o_direct = 1;
184 log_dbg(cd, "Trying to open device %s without direct-io.",
185 device_path(device));
186 devfd = open(device_path(device), O_RDONLY);
190 log_err(cd, _("Device %s does not exist or access denied."),
191 device_path(device));
195 if (fstat(devfd, &st) < 0)
197 else if (!S_ISBLK(st.st_mode))
198 r = S_ISREG(st.st_mode) ? -ENOTBLK : -EINVAL;
200 log_err(cd, _("Device %s is not compatible."),
201 device_path(device));
206 /* Allow only increase (loop device) */
207 tmp_size = device_alignment_fd(devfd);
208 if (tmp_size > device->alignment)
209 device->alignment = tmp_size;
211 tmp_size = device_block_size_fd(devfd, NULL);
212 if (tmp_size > device->block_size)
213 device->block_size = tmp_size;
219 static int _open_locked(struct crypt_device *cd, struct device *device, int flags)
226 log_dbg(cd, "Opening locked device %s", device_path(device));
228 if ((flags & O_ACCMODE) != O_RDONLY && device_locked_readonly(device->lh)) {
229 log_dbg(cd, "Cannot open locked device %s in write mode. Read lock held.", device_path(device));
233 fd = open(device_path(device), flags);
237 if (device_locked_verify(cd, fd, device->lh)) {
238 /* fd doesn't correspond to a locked resource */
240 log_dbg(cd, "Failed to verify lock resource for device %s.", device_path(device));
248 * Common wrapper for device sync.
250 void device_sync(struct crypt_device *cd, struct device *device)
252 if (!device || device->dev_fd < 0)
255 if (fsync(device->dev_fd) == -1)
256 log_dbg(cd, "Cannot sync device %s.", device_path(device));
260 * in non-locked mode returns always fd or -1
263 * opened fd or one of:
264 * -EAGAIN : requested write mode while device being locked in via shared lock
265 * -EINVAL : invalid lock fd state
266 * -1 : all other errors
268 static int device_open_internal(struct crypt_device *cd, struct device *device, int flags)
272 if (device->o_direct)
275 access = flags & O_ACCMODE;
276 if (access == O_WRONLY)
279 if (access == O_RDONLY && device->ro_dev_fd >= 0) {
280 log_dbg(cd, "Reusing open r%c fd on device %s", 'o', device_path(device));
281 return device->ro_dev_fd;
282 } else if (access == O_RDWR && device->dev_fd >= 0) {
283 log_dbg(cd, "Reusing open r%c fd on device %s", 'w', device_path(device));
284 return device->dev_fd;
287 if (device_locked(device->lh))
288 devfd = _open_locked(cd, device, flags);
290 devfd = open(device_path(device), flags);
293 log_dbg(cd, "Cannot open device %s%s.",
295 access != O_RDONLY ? " for write" : "");
299 if (access == O_RDONLY)
300 device->ro_dev_fd = devfd;
302 device->dev_fd = devfd;
307 int device_open(struct crypt_device *cd, struct device *device, int flags)
312 assert(!device_locked(device->lh));
313 return device_open_internal(cd, device, flags);
316 int device_open_excl(struct crypt_device *cd, struct device *device, int flags)
324 assert(!device_locked(device->lh));
326 if (device->dev_fd_excl < 0) {
327 path = device_path(device);
330 if (!S_ISBLK(st.st_mode))
331 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
334 /* open(2) with O_EXCL (w/o O_CREAT) on regular file is undefined behaviour according to man page */
335 /* coverity[toctou] */
336 device->dev_fd_excl = open(path, O_RDONLY | O_EXCL);
337 if (device->dev_fd_excl < 0)
338 return errno == EBUSY ? -EBUSY : device->dev_fd_excl;
339 if (fstat(device->dev_fd_excl, &st) || !S_ISBLK(st.st_mode)) {
340 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
342 close(device->dev_fd_excl);
343 device->dev_fd_excl = -1;
345 log_dbg(cd, "Device %s is blocked for exclusive open.", path);
349 return device_open_internal(cd, device, flags);
352 void device_release_excl(struct crypt_device *cd, struct device *device)
354 if (device && device->dev_fd_excl >= 0) {
355 if (close(device->dev_fd_excl))
356 log_dbg(cd, "Failed to release exclusive handle on device %s.",
357 device_path(device));
359 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
360 device->dev_fd_excl = -1;
364 int device_open_locked(struct crypt_device *cd, struct device *device, int flags)
369 assert(!crypt_metadata_locking_enabled() || device_locked(device->lh));
370 return device_open_internal(cd, device, flags);
373 /* Avoid any read from device, expects direct-io to work. */
374 int device_alloc_no_check(struct device **device, const char *path)
383 dev = malloc(sizeof(struct device));
387 memset(dev, 0, sizeof(struct device));
388 dev->path = strdup(path);
396 dev->dev_fd_excl = -1;
403 int device_alloc(struct crypt_device *cd, struct device **device, const char *path)
408 r = device_alloc_no_check(&dev, path);
413 r = device_ready(cd, dev);
416 } else if (r == -ENOTBLK) {
417 /* alloc loop later */
429 void device_free(struct crypt_device *cd, struct device *device)
434 device_close(cd, device);
436 if (device->dev_fd_excl != -1) {
437 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
438 close(device->dev_fd_excl);
441 if (device->loop_fd != -1) {
442 log_dbg(cd, "Closed loop %s (%s).", device->path, device->file_path);
443 close(device->loop_fd);
446 assert(!device_locked(device->lh));
448 free(device->file_path);
453 /* Get block device path */
454 const char *device_block_path(const struct device *device)
456 if (!device || !device->init_done)
462 /* Get device-mapper name of device (if possible) */
463 const char *device_dm_name(const struct device *device)
465 const char *dmdir = dm_get_dir();
466 size_t dmdir_len = strlen(dmdir);
468 if (!device || !device->init_done)
471 if (strncmp(device->path, dmdir, dmdir_len))
474 return &device->path[dmdir_len+1];
477 /* Get path to device / file */
478 const char *device_path(const struct device *device)
483 if (device->file_path)
484 return device->file_path;
489 /* block device topology ioctls, introduced in 2.6.32 */
491 #define BLKIOMIN _IO(0x12,120)
492 #define BLKIOOPT _IO(0x12,121)
493 #define BLKALIGNOFF _IO(0x12,122)
496 void device_topology_alignment(struct crypt_device *cd,
497 struct device *device,
498 unsigned long *required_alignment, /* bytes */
499 unsigned long *alignment_offset, /* bytes */
500 unsigned long default_alignment)
502 int dev_alignment_offset = 0;
503 unsigned int min_io_size = 0, opt_io_size = 0;
504 unsigned long temp_alignment = 0;
507 *required_alignment = default_alignment;
508 *alignment_offset = 0;
510 if (!device || !device->path) //FIXME
513 fd = open(device->path, O_RDONLY);
517 /* minimum io size */
518 if (ioctl(fd, BLKIOMIN, &min_io_size) == -1) {
519 log_dbg(cd, "Topology info for %s not supported, using default offset %lu bytes.",
520 device->path, default_alignment);
524 /* optimal io size */
525 if (ioctl(fd, BLKIOOPT, &opt_io_size) == -1)
526 opt_io_size = min_io_size;
528 /* alignment offset, bogus -1 means misaligned/unknown */
529 if (ioctl(fd, BLKALIGNOFF, &dev_alignment_offset) == -1 || dev_alignment_offset < 0)
530 dev_alignment_offset = 0;
531 *alignment_offset = (unsigned long)dev_alignment_offset;
533 temp_alignment = (unsigned long)min_io_size;
536 * Ignore bogus opt-io that could break alignment.
537 * Also real opt_io_size should be aligned to minimal page size (4k).
538 * Some bogus USB enclosures reports wrong data here.
540 if ((temp_alignment < (unsigned long)opt_io_size) &&
541 !((unsigned long)opt_io_size % temp_alignment) && !MISALIGNED_4K(opt_io_size))
542 temp_alignment = (unsigned long)opt_io_size;
543 else if (opt_io_size && (opt_io_size != min_io_size))
544 log_err(cd, _("Ignoring bogus optimal-io size for data device (%u bytes)."), opt_io_size);
546 /* If calculated alignment is multiple of default, keep default */
547 if (temp_alignment && (default_alignment % temp_alignment))
548 *required_alignment = temp_alignment;
550 log_dbg(cd, "Topology: IO (%u/%u), offset = %lu; Required alignment is %lu bytes.",
551 min_io_size, opt_io_size, *alignment_offset, *required_alignment);
556 size_t device_block_size(struct crypt_device *cd, struct device *device)
563 if (device->block_size)
564 return device->block_size;
566 fd = open(device->file_path ?: device->path, O_RDONLY);
568 device->block_size = device_block_size_fd(fd, NULL);
572 if (!device->block_size)
573 log_dbg(cd, "Cannot get block size for device %s.", device_path(device));
575 return device->block_size;
578 int device_read_ahead(struct device *device, uint32_t *read_ahead)
581 long read_ahead_long;
586 if ((fd = open(device->path, O_RDONLY)) < 0)
589 r = ioctl(fd, BLKRAGET, &read_ahead_long) ? 0 : 1;
593 *read_ahead = (uint32_t) read_ahead_long;
598 /* Get data size in bytes */
599 int device_size(struct device *device, uint64_t *size)
602 int devfd, r = -EINVAL;
607 devfd = open(device->path, O_RDONLY);
611 if (fstat(devfd, &st) < 0)
614 if (S_ISREG(st.st_mode)) {
615 *size = (uint64_t)st.st_size;
617 } else if (ioctl(devfd, BLKGETSIZE64, size) >= 0)
624 /* For a file, allocate the required space */
625 int device_fallocate(struct device *device, uint64_t size)
628 int devfd, r = -EINVAL;
633 devfd = open(device_path(device), O_RDWR);
637 if (!fstat(devfd, &st) && S_ISREG(st.st_mode) &&
638 ((uint64_t)st.st_size >= size || !posix_fallocate(devfd, 0, size))) {
640 if (device->file_path && crypt_loop_resize(device->path))
648 int device_check_size(struct crypt_device *cd,
649 struct device *device,
650 uint64_t req_offset, int falloc)
654 if (device_size(device, &dev_size)) {
655 log_dbg(cd, "Cannot get device size for device %s.", device_path(device));
659 log_dbg(cd, "Device size %" PRIu64 ", offset %" PRIu64 ".", dev_size, req_offset);
661 if (req_offset > dev_size) {
662 /* If it is header file, increase its size */
663 if (falloc && !device_fallocate(device, req_offset))
666 log_err(cd, _("Device %s is too small. Need at least %" PRIu64 " bytes."),
667 device_path(device), req_offset);
674 static int device_info(struct crypt_device *cd,
675 struct device *device,
676 enum devcheck device_check,
677 int *readonly, uint64_t *size)
680 int fd = -1, r, flags = 0, real_readonly;
689 if (stat(device->path, &st) < 0) {
694 /* never wipe header on mounted device */
695 if (device_check == DEV_EXCL && S_ISBLK(st.st_mode))
698 /* Try to open read-write to check whether it is a read-only device */
699 /* coverity[toctou] */
700 fd = open(device->path, O_RDWR | flags);
701 if (fd == -1 && errno == EROFS) {
703 fd = open(device->path, O_RDONLY | flags);
706 if (fd == -1 && device_check == DEV_EXCL && errno == EBUSY) {
712 r = errno ? -errno : -EINVAL;
717 if (S_ISREG(st.st_mode)) {
718 //FIXME: add readonly check
719 real_size = (uint64_t)st.st_size;
720 real_size >>= SECTOR_SHIFT;
722 /* If the device can be opened read-write, i.e. readonly is still 0, then
723 * check whether BKROGET says that it is read-only. E.g. read-only loop
724 * devices may be opened read-write but are read-only according to BLKROGET
726 if (real_readonly == 0 && (r = ioctl(fd, BLKROGET, &real_readonly)) < 0)
729 r = ioctl(fd, BLKGETSIZE64, &real_size);
731 real_size >>= SECTOR_SHIFT;
742 *readonly = real_readonly;
747 log_err(cd, _("Cannot use device %s which is in use "
748 "(already mapped or mounted)."), device_path(device));
751 log_err(cd, _("Cannot use device %s, permission denied."), device_path(device));
754 log_err(cd, _("Cannot get info about device %s."), device_path(device));
761 int device_check_access(struct crypt_device *cd,
762 struct device *device,
763 enum devcheck device_check)
765 return device_info(cd, device, device_check, NULL, NULL);
768 static int device_internal_prepare(struct crypt_device *cd, struct device *device)
770 char *loop_device = NULL, *file_path = NULL;
771 int r, loop_fd, readonly = 0;
773 if (device->init_done)
776 if (getuid() || geteuid()) {
777 log_err(cd, _("Cannot use a loopback device, "
778 "running as non-root user."));
782 log_dbg(cd, "Allocating a free loop device.");
784 /* Keep the loop open, detached on last close. */
785 loop_fd = crypt_loop_attach(&loop_device, device->path, 0, 1, &readonly);
787 log_err(cd, _("Attaching loopback device failed "
788 "(loop device with autoclear flag is required)."));
793 file_path = device->path;
794 device->path = loop_device;
796 r = device_ready(cd, device);
798 device->path = file_path;
799 crypt_loop_detach(loop_device);
804 device->loop_fd = loop_fd;
805 device->file_path = file_path;
806 device->init_done = 1;
811 int device_block_adjust(struct crypt_device *cd,
812 struct device *device,
813 enum devcheck device_check,
814 uint64_t device_offset,
818 int r, real_readonly;
824 r = device_internal_prepare(cd, device);
828 r = device_info(cd, device, device_check, &real_readonly, &real_size);
832 if (device_offset >= real_size) {
833 log_err(cd, _("Requested offset is beyond real size of device %s."),
834 device_path(device));
838 if (size && !*size) {
841 log_err(cd, _("Device %s has zero size."), device_path(device));
844 *size -= device_offset;
847 /* in case of size is set by parameter */
848 if (size && ((real_size - device_offset) < *size)) {
849 log_dbg(cd, "Device %s: offset = %" PRIu64 " requested size = %" PRIu64
850 ", backing device size = %" PRIu64,
851 device->path, device_offset, *size, real_size);
852 log_err(cd, _("Device %s is too small."), device_path(device));
856 if (flags && real_readonly)
857 *flags |= CRYPT_ACTIVATE_READONLY;
860 log_dbg(cd, "Calculated device size is %" PRIu64" sectors (%s), offset %" PRIu64 ".",
861 *size, real_readonly ? "RO" : "RW", device_offset);
865 size_t size_round_up(size_t size, size_t block)
867 size_t s = (size + (block - 1)) / block;
871 void device_disable_direct_io(struct device *device)
874 device->o_direct = 0;
877 int device_direct_io(const struct device *device)
879 return device ? device->o_direct : 0;
882 static int device_compare_path(const char *path1, const char *path2)
884 struct stat st_path1, st_path2;
886 if (stat(path1, &st_path1 ) < 0 || stat(path2, &st_path2 ) < 0)
889 if (S_ISBLK(st_path1.st_mode) && S_ISBLK(st_path2.st_mode))
890 return (st_path1.st_rdev == st_path2.st_rdev) ? 1 : 0;
892 if (S_ISREG(st_path1.st_mode) && S_ISREG(st_path2.st_mode))
893 return (st_path1.st_ino == st_path2.st_ino &&
894 st_path1.st_dev == st_path2.st_dev) ? 1 : 0;
899 int device_is_identical(struct device *device1, struct device *device2)
901 if (!device1 || !device2)
904 if (device1 == device2)
907 if (!strcmp(device_path(device1), device_path(device2)))
910 return device_compare_path(device_path(device1), device_path(device2));
913 int device_is_rotational(struct device *device)
920 if (stat(device_path(device), &st) < 0)
923 if (!S_ISBLK(st.st_mode))
926 return crypt_dev_is_rotational(major(st.st_rdev), minor(st.st_rdev));
929 size_t device_alignment(struct device *device)
936 if (!device->alignment) {
937 devfd = open(device_path(device), O_RDONLY);
939 device->alignment = device_alignment_fd(devfd);
944 return device->alignment;
947 void device_set_lock_handle(struct device *device, struct crypt_lock_handle *h)
953 struct crypt_lock_handle *device_get_lock_handle(struct device *device)
955 return device ? device->lh : NULL;
958 int device_read_lock(struct crypt_device *cd, struct device *device)
960 if (!device || !crypt_metadata_locking_enabled())
963 if (device_read_lock_internal(cd, device))
969 int device_write_lock(struct crypt_device *cd, struct device *device)
971 if (!device || !crypt_metadata_locking_enabled())
974 assert(!device_locked(device->lh) || !device_locked_readonly(device->lh));
976 return device_write_lock_internal(cd, device);
979 void device_read_unlock(struct crypt_device *cd, struct device *device)
981 if (!device || !crypt_metadata_locking_enabled())
984 assert(device_locked(device->lh));
986 device_unlock_internal(cd, device);
989 void device_write_unlock(struct crypt_device *cd, struct device *device)
991 if (!device || !crypt_metadata_locking_enabled())
994 assert(device_locked(device->lh) && !device_locked_readonly(device->lh));
996 device_unlock_internal(cd, device);
999 bool device_is_locked(struct device *device)
1001 return device ? device_locked(device->lh) : 0;
1004 void device_close(struct crypt_device *cd, struct device *device)
1009 if (device->ro_dev_fd != -1) {
1010 log_dbg(cd, "Closing read only fd for %s.", device_path(device));
1011 if (close(device->ro_dev_fd))
1012 log_dbg(cd, "Failed to close read only fd for %s.", device_path(device));
1013 device->ro_dev_fd = -1;
1016 if (device->dev_fd != -1) {
1017 log_dbg(cd, "Closing read write fd for %s.", device_path(device));
1018 if (close(device->dev_fd))
1019 log_dbg(cd, "Failed to close read write fd for %s.", device_path(device));
1020 device->dev_fd = -1;