2 * device backend utilities
4 * Copyright (C) 2004 Jana Saout <jana@saout.de>
5 * Copyright (C) 2004-2007 Clemens Fruhwirth <clemens@endorphin.org>
6 * Copyright (C) 2009-2023 Red Hat, Inc. All rights reserved.
7 * Copyright (C) 2009-2023 Milan Broz
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/types.h>
29 #include <sys/ioctl.h>
32 #ifdef HAVE_SYS_SYSMACROS_H
33 # include <sys/sysmacros.h> /* for major, minor */
35 #ifdef HAVE_SYS_STATVFS_H
36 # include <sys/statvfs.h>
39 #include "utils_device_locking.h"
51 struct crypt_lock_handle *lh;
53 unsigned int o_direct:1;
54 unsigned int init_done:1; /* path is bdev or loop already initialized */
59 size_t loop_block_size;
62 static size_t device_fs_block_size_fd(int fd)
64 size_t page_size = crypt_getpagesize();
66 #ifdef HAVE_SYS_STATVFS_H
70 * NOTE: some filesystems (NFS) returns bogus blocksize (1MB).
71 * Page-size io should always work and avoids increasing IO beyond aligned LUKS header.
73 if (!fstatvfs(fd, &buf) && buf.f_bsize && buf.f_bsize <= page_size)
74 return (size_t)buf.f_bsize;
79 static size_t device_block_size_fd(int fd, size_t *min_size)
85 if (fstat(fd, &st) < 0)
88 if (S_ISREG(st.st_mode))
89 bsize = device_fs_block_size_fd(fd);
91 if (ioctl(fd, BLKSSZGET, &arg) < 0)
92 bsize = crypt_getpagesize();
100 if (S_ISREG(st.st_mode)) {
101 /* file can be empty as well */
102 if (st.st_size > (ssize_t)bsize)
105 *min_size = st.st_size;
107 /* block device must have at least one block */
114 static size_t device_block_phys_size_fd(int fd)
118 size_t bsize = SECTOR_SIZE;
120 if (fstat(fd, &st) < 0)
123 if (S_ISREG(st.st_mode))
124 bsize = MAX_SECTOR_SIZE;
125 else if (ioctl(fd, BLKPBSZGET, &arg) >= 0)
131 static size_t device_alignment_fd(int devfd)
133 long alignment = DEFAULT_MEM_ALIGNMENT;
135 #ifdef _PC_REC_XFER_ALIGN
136 alignment = fpathconf(devfd, _PC_REC_XFER_ALIGN);
138 alignment = DEFAULT_MEM_ALIGNMENT;
140 return (size_t)alignment;
143 static int device_read_test(int devfd)
147 size_t minsize = 0, blocksize, alignment;
149 blocksize = device_block_size_fd(devfd, &minsize);
150 alignment = device_alignment_fd(devfd);
152 if (!blocksize || !alignment)
158 if (minsize > sizeof(buffer))
159 minsize = sizeof(buffer);
161 if (read_blockwise(devfd, blocksize, alignment, buffer, minsize) == (ssize_t)minsize)
164 crypt_safe_memzero(buffer, sizeof(buffer));
169 * The direct-io is always preferred. The header is usually mapped to the same
170 * device and can be accessed when the rest of device is mapped to data device.
171 * Using direct-io ensures that we do not mess with data in cache.
172 * (But proper alignment should prevent this in the first place.)
173 * The read test is needed to detect broken configurations (seen with remote
174 * block devices) that allow open with direct-io but then fails on read.
176 static int device_ready(struct crypt_device *cd, struct device *device)
178 int devfd = -1, r = 0;
185 if (device->o_direct) {
186 log_dbg(cd, "Trying to open and read device %s with direct-io.",
187 device_path(device));
188 device->o_direct = 0;
189 devfd = open(device_path(device), O_RDONLY | O_DIRECT);
191 if (device_read_test(devfd) == 0) {
192 device->o_direct = 1;
201 log_dbg(cd, "Trying to open device %s without direct-io.",
202 device_path(device));
203 devfd = open(device_path(device), O_RDONLY);
207 log_err(cd, _("Device %s does not exist or access denied."),
208 device_path(device));
212 if (fstat(devfd, &st) < 0)
214 else if (!S_ISBLK(st.st_mode))
215 r = S_ISREG(st.st_mode) ? -ENOTBLK : -EINVAL;
217 log_err(cd, _("Device %s is not compatible."),
218 device_path(device));
223 /* Allow only increase (loop device) */
224 tmp_size = device_alignment_fd(devfd);
225 if (tmp_size > device->alignment)
226 device->alignment = tmp_size;
228 tmp_size = device_block_size_fd(devfd, NULL);
229 if (tmp_size > device->block_size)
230 device->block_size = tmp_size;
236 static int _open_locked(struct crypt_device *cd, struct device *device, int flags)
243 log_dbg(cd, "Opening locked device %s", device_path(device));
245 if ((flags & O_ACCMODE) != O_RDONLY && device_locked_readonly(device->lh)) {
246 log_dbg(cd, "Cannot open locked device %s in write mode. Read lock held.", device_path(device));
250 fd = open(device_path(device), flags);
254 if (device_locked_verify(cd, fd, device->lh)) {
255 /* fd doesn't correspond to a locked resource */
257 log_dbg(cd, "Failed to verify lock resource for device %s.", device_path(device));
265 * Common wrapper for device sync.
267 void device_sync(struct crypt_device *cd, struct device *device)
269 if (!device || device->dev_fd < 0)
272 if (fsync(device->dev_fd) == -1)
273 log_dbg(cd, "Cannot sync device %s.", device_path(device));
277 * in non-locked mode returns always fd or -1
280 * opened fd or one of:
281 * -EAGAIN : requested write mode while device being locked in via shared lock
282 * -EINVAL : invalid lock fd state
283 * -1 : all other errors
285 static int device_open_internal(struct crypt_device *cd, struct device *device, int flags)
289 if (device->o_direct)
292 access = flags & O_ACCMODE;
293 if (access == O_WRONLY)
296 if (access == O_RDONLY && device->ro_dev_fd >= 0) {
297 log_dbg(cd, "Reusing open r%c fd on device %s", 'o', device_path(device));
298 return device->ro_dev_fd;
299 } else if (access == O_RDWR && device->dev_fd >= 0) {
300 log_dbg(cd, "Reusing open r%c fd on device %s", 'w', device_path(device));
301 return device->dev_fd;
304 if (device_locked(device->lh))
305 devfd = _open_locked(cd, device, flags);
307 devfd = open(device_path(device), flags);
310 log_dbg(cd, "Cannot open device %s%s.",
312 access != O_RDONLY ? " for write" : "");
316 if (access == O_RDONLY)
317 device->ro_dev_fd = devfd;
319 device->dev_fd = devfd;
324 int device_open(struct crypt_device *cd, struct device *device, int flags)
329 assert(!device_locked(device->lh));
330 return device_open_internal(cd, device, flags);
333 int device_open_excl(struct crypt_device *cd, struct device *device, int flags)
341 assert(!device_locked(device->lh));
343 if (device->dev_fd_excl < 0) {
344 path = device_path(device);
347 if (!S_ISBLK(st.st_mode))
348 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
351 /* open(2) with O_EXCL (w/o O_CREAT) on regular file is undefined behaviour according to man page */
352 /* coverity[toctou] */
353 device->dev_fd_excl = open(path, O_RDONLY | O_EXCL); /* lgtm[cpp/toctou-race-condition] */
354 if (device->dev_fd_excl < 0)
355 return errno == EBUSY ? -EBUSY : device->dev_fd_excl;
356 if (fstat(device->dev_fd_excl, &st) || !S_ISBLK(st.st_mode)) {
357 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
359 close(device->dev_fd_excl);
360 device->dev_fd_excl = -1;
362 log_dbg(cd, "Device %s is blocked for exclusive open.", path);
366 return device_open_internal(cd, device, flags);
369 void device_release_excl(struct crypt_device *cd, struct device *device)
371 if (device && device->dev_fd_excl >= 0) {
372 if (close(device->dev_fd_excl))
373 log_dbg(cd, "Failed to release exclusive handle on device %s.",
374 device_path(device));
376 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
377 device->dev_fd_excl = -1;
381 int device_open_locked(struct crypt_device *cd, struct device *device, int flags)
386 assert(!crypt_metadata_locking_enabled() || device_locked(device->lh));
387 return device_open_internal(cd, device, flags);
390 /* Avoid any read from device, expects direct-io to work. */
391 int device_alloc_no_check(struct device **device, const char *path)
400 dev = malloc(sizeof(struct device));
404 memset(dev, 0, sizeof(struct device));
405 dev->path = strdup(path);
413 dev->dev_fd_excl = -1;
420 int device_alloc(struct crypt_device *cd, struct device **device, const char *path)
425 r = device_alloc_no_check(&dev, path);
430 r = device_ready(cd, dev);
433 } else if (r == -ENOTBLK) {
434 /* alloc loop later */
446 void device_free(struct crypt_device *cd, struct device *device)
451 device_close(cd, device);
453 if (device->dev_fd_excl != -1) {
454 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
455 close(device->dev_fd_excl);
458 if (device->loop_fd != -1) {
459 log_dbg(cd, "Closed loop %s (%s).", device->path, device->file_path);
460 close(device->loop_fd);
463 assert(!device_locked(device->lh));
465 free(device->file_path);
470 /* Get block device path */
471 const char *device_block_path(const struct device *device)
473 if (!device || !device->init_done)
479 /* Get device-mapper name of device (if possible) */
480 const char *device_dm_name(const struct device *device)
482 const char *dmdir = dm_get_dir();
483 size_t dmdir_len = strlen(dmdir);
485 if (!device || !device->init_done)
488 if (strncmp(device->path, dmdir, dmdir_len))
491 return &device->path[dmdir_len+1];
494 /* Get path to device / file */
495 const char *device_path(const struct device *device)
500 if (device->file_path)
501 return device->file_path;
506 /* block device topology ioctls, introduced in 2.6.32 */
508 #define BLKIOMIN _IO(0x12,120)
509 #define BLKIOOPT _IO(0x12,121)
510 #define BLKALIGNOFF _IO(0x12,122)
513 void device_topology_alignment(struct crypt_device *cd,
514 struct device *device,
515 unsigned long *required_alignment, /* bytes */
516 unsigned long *alignment_offset, /* bytes */
517 unsigned long default_alignment)
519 int dev_alignment_offset = 0;
520 unsigned int min_io_size = 0, opt_io_size = 0;
521 unsigned long temp_alignment = 0;
524 *required_alignment = default_alignment;
525 *alignment_offset = 0;
527 if (!device || !device->path) //FIXME
530 fd = open(device->path, O_RDONLY);
534 /* minimum io size */
535 if (ioctl(fd, BLKIOMIN, &min_io_size) == -1) {
536 log_dbg(cd, "Topology info for %s not supported, using default offset %lu bytes.",
537 device->path, default_alignment);
541 /* optimal io size */
542 if (ioctl(fd, BLKIOOPT, &opt_io_size) == -1)
543 opt_io_size = min_io_size;
545 /* alignment offset, bogus -1 means misaligned/unknown */
546 if (ioctl(fd, BLKALIGNOFF, &dev_alignment_offset) == -1 || dev_alignment_offset < 0)
547 dev_alignment_offset = 0;
548 *alignment_offset = (unsigned long)dev_alignment_offset;
550 temp_alignment = (unsigned long)min_io_size;
553 * Ignore bogus opt-io that could break alignment.
554 * Also real opt_io_size should be aligned to minimal page size (4k).
555 * Some bogus USB enclosures reports wrong data here.
557 if ((temp_alignment < (unsigned long)opt_io_size) &&
558 !((unsigned long)opt_io_size % temp_alignment) && !MISALIGNED_4K(opt_io_size))
559 temp_alignment = (unsigned long)opt_io_size;
560 else if (opt_io_size && (opt_io_size != min_io_size))
561 log_err(cd, _("Ignoring bogus optimal-io size for data device (%u bytes)."), opt_io_size);
563 /* If calculated alignment is multiple of default, keep default */
564 if (temp_alignment && (default_alignment % temp_alignment))
565 *required_alignment = temp_alignment;
567 log_dbg(cd, "Topology: IO (%u/%u), offset = %lu; Required alignment is %lu bytes.",
568 min_io_size, opt_io_size, *alignment_offset, *required_alignment);
573 size_t device_block_size(struct crypt_device *cd, struct device *device)
580 if (device->block_size)
581 return device->block_size;
583 fd = open(device->file_path ?: device->path, O_RDONLY);
585 device->block_size = device_block_size_fd(fd, NULL);
589 if (!device->block_size)
590 log_dbg(cd, "Cannot get block size for device %s.", device_path(device));
592 return device->block_size;
595 size_t device_optimal_encryption_sector_size(struct crypt_device *cd, struct device *device)
598 size_t phys_block_size;
603 fd = open(device->file_path ?: device->path, O_RDONLY);
605 log_dbg(cd, "Cannot get optimal encryption sector size for device %s.", device_path(device));
609 /* cache device block size */
610 device->block_size = device_block_size_fd(fd, NULL);
611 if (!device->block_size) {
613 log_dbg(cd, "Cannot get block size for device %s.", device_path(device));
617 if (device->block_size >= MAX_SECTOR_SIZE) {
619 return MISALIGNED(device->block_size, MAX_SECTOR_SIZE) ? SECTOR_SIZE : MAX_SECTOR_SIZE;
622 phys_block_size = device_block_phys_size_fd(fd);
625 if (device->block_size >= phys_block_size ||
626 phys_block_size <= SECTOR_SIZE ||
627 phys_block_size > MAX_SECTOR_SIZE ||
628 MISALIGNED(phys_block_size, device->block_size))
629 return device->block_size;
631 return phys_block_size;
634 int device_read_ahead(struct device *device, uint32_t *read_ahead)
637 long read_ahead_long;
642 if ((fd = open(device->path, O_RDONLY)) < 0)
645 r = ioctl(fd, BLKRAGET, &read_ahead_long) ? 0 : 1;
649 *read_ahead = (uint32_t) read_ahead_long;
654 /* Get data size in bytes */
655 int device_size(struct device *device, uint64_t *size)
658 int devfd, r = -EINVAL;
663 devfd = open(device->path, O_RDONLY);
667 if (fstat(devfd, &st) < 0)
670 if (S_ISREG(st.st_mode)) {
671 *size = (uint64_t)st.st_size;
673 } else if (ioctl(devfd, BLKGETSIZE64, size) >= 0)
680 /* For a file, allocate the required space */
681 int device_fallocate(struct device *device, uint64_t size)
684 int devfd, r = -EINVAL;
689 devfd = open(device_path(device), O_RDWR);
693 if (!fstat(devfd, &st) && S_ISREG(st.st_mode) &&
694 ((uint64_t)st.st_size >= size || !posix_fallocate(devfd, 0, size))) {
696 if (device->file_path && crypt_loop_resize(device->path))
704 int device_check_size(struct crypt_device *cd,
705 struct device *device,
706 uint64_t req_offset, int falloc)
710 if (device_size(device, &dev_size)) {
711 log_dbg(cd, "Cannot get device size for device %s.", device_path(device));
715 log_dbg(cd, "Device size %" PRIu64 ", offset %" PRIu64 ".", dev_size, req_offset);
717 if (req_offset > dev_size) {
718 /* If it is header file, increase its size */
719 if (falloc && !device_fallocate(device, req_offset))
722 log_err(cd, _("Device %s is too small. Need at least %" PRIu64 " bytes."),
723 device_path(device), req_offset);
730 static int device_info(struct crypt_device *cd,
731 struct device *device,
732 enum devcheck device_check,
733 int *readonly, uint64_t *size)
736 int fd = -1, r, flags = 0, real_readonly;
745 if (stat(device->path, &st) < 0) {
750 /* never wipe header on mounted device */
751 if (device_check == DEV_EXCL && S_ISBLK(st.st_mode))
754 /* Try to open read-write to check whether it is a read-only device */
755 /* coverity[toctou] */
756 fd = open(device->path, O_RDWR | flags);
757 if (fd == -1 && errno == EROFS) {
759 fd = open(device->path, O_RDONLY | flags);
762 if (fd == -1 && device_check == DEV_EXCL && errno == EBUSY) {
768 r = errno ? -errno : -EINVAL;
773 if (S_ISREG(st.st_mode)) {
774 //FIXME: add readonly check
775 real_size = (uint64_t)st.st_size;
776 real_size >>= SECTOR_SHIFT;
778 /* If the device can be opened read-write, i.e. readonly is still 0, then
779 * check whether BKROGET says that it is read-only. E.g. read-only loop
780 * devices may be opened read-write but are read-only according to BLKROGET
782 if (real_readonly == 0 && (r = ioctl(fd, BLKROGET, &real_readonly)) < 0)
785 r = ioctl(fd, BLKGETSIZE64, &real_size);
787 real_size >>= SECTOR_SHIFT;
798 *readonly = real_readonly;
803 log_err(cd, _("Cannot use device %s which is in use "
804 "(already mapped or mounted)."), device_path(device));
807 log_err(cd, _("Cannot use device %s, permission denied."), device_path(device));
810 log_err(cd, _("Cannot get info about device %s."), device_path(device));
817 int device_check_access(struct crypt_device *cd,
818 struct device *device,
819 enum devcheck device_check)
821 return device_info(cd, device, device_check, NULL, NULL);
824 static int device_internal_prepare(struct crypt_device *cd, struct device *device)
826 char *loop_device = NULL, *file_path = NULL;
827 int r, loop_fd, readonly = 0;
829 if (device->init_done)
832 if (getuid() || geteuid()) {
833 log_err(cd, _("Cannot use a loopback device, "
834 "running as non-root user."));
838 log_dbg(cd, "Allocating a free loop device (block size: %zu).",
839 device->loop_block_size ?: SECTOR_SIZE);
841 /* Keep the loop open, detached on last close. */
842 loop_fd = crypt_loop_attach(&loop_device, device->path, 0, 1, &readonly, device->loop_block_size);
844 log_err(cd, _("Attaching loopback device failed "
845 "(loop device with autoclear flag is required)."));
850 file_path = device->path;
851 device->path = loop_device;
853 r = device_ready(cd, device);
855 device->path = file_path;
856 crypt_loop_detach(loop_device);
861 log_dbg(cd, "Attached loop device block size is %zu bytes.", device_block_size_fd(loop_fd, NULL));
863 device->loop_fd = loop_fd;
864 device->file_path = file_path;
865 device->init_done = 1;
870 int device_block_adjust(struct crypt_device *cd,
871 struct device *device,
872 enum devcheck device_check,
873 uint64_t device_offset,
877 int r, real_readonly;
883 r = device_internal_prepare(cd, device);
887 r = device_info(cd, device, device_check, &real_readonly, &real_size);
891 if (device_offset >= real_size) {
892 log_err(cd, _("Requested offset is beyond real size of device %s."),
893 device_path(device));
897 if (size && !*size) {
900 log_err(cd, _("Device %s has zero size."), device_path(device));
903 *size -= device_offset;
906 /* in case of size is set by parameter */
907 if (size && ((real_size - device_offset) < *size)) {
908 log_dbg(cd, "Device %s: offset = %" PRIu64 " requested size = %" PRIu64
909 ", backing device size = %" PRIu64,
910 device->path, device_offset, *size, real_size);
911 log_err(cd, _("Device %s is too small."), device_path(device));
915 if (flags && real_readonly)
916 *flags |= CRYPT_ACTIVATE_READONLY;
919 log_dbg(cd, "Calculated device size is %" PRIu64" sectors (%s), offset %" PRIu64 ".",
920 *size, real_readonly ? "RO" : "RW", device_offset);
924 size_t size_round_up(size_t size, size_t block)
926 size_t s = (size + (block - 1)) / block;
930 void device_disable_direct_io(struct device *device)
933 device->o_direct = 0;
936 int device_direct_io(const struct device *device)
938 return device ? device->o_direct : 0;
941 static int device_compare_path(const char *path1, const char *path2)
943 struct stat st_path1, st_path2;
945 if (stat(path1, &st_path1 ) < 0 || stat(path2, &st_path2 ) < 0)
948 if (S_ISBLK(st_path1.st_mode) && S_ISBLK(st_path2.st_mode))
949 return (st_path1.st_rdev == st_path2.st_rdev) ? 1 : 0;
951 if (S_ISREG(st_path1.st_mode) && S_ISREG(st_path2.st_mode))
952 return (st_path1.st_ino == st_path2.st_ino &&
953 st_path1.st_dev == st_path2.st_dev) ? 1 : 0;
958 int device_is_identical(struct device *device1, struct device *device2)
960 if (!device1 || !device2)
963 if (device1 == device2)
966 if (!strcmp(device_path(device1), device_path(device2)))
969 return device_compare_path(device_path(device1), device_path(device2));
972 int device_is_rotational(struct device *device)
979 if (stat(device_path(device), &st) < 0)
982 if (!S_ISBLK(st.st_mode))
985 return crypt_dev_is_rotational(major(st.st_rdev), minor(st.st_rdev));
988 size_t device_alignment(struct device *device)
995 if (!device->alignment) {
996 devfd = open(device_path(device), O_RDONLY);
998 device->alignment = device_alignment_fd(devfd);
1003 return device->alignment;
1006 void device_set_lock_handle(struct device *device, struct crypt_lock_handle *h)
1012 struct crypt_lock_handle *device_get_lock_handle(struct device *device)
1014 return device ? device->lh : NULL;
1017 int device_read_lock(struct crypt_device *cd, struct device *device)
1019 if (!device || !crypt_metadata_locking_enabled())
1022 if (device_read_lock_internal(cd, device))
1028 int device_write_lock(struct crypt_device *cd, struct device *device)
1030 if (!device || !crypt_metadata_locking_enabled())
1033 assert(!device_locked(device->lh) || !device_locked_readonly(device->lh));
1035 return device_write_lock_internal(cd, device);
1038 void device_read_unlock(struct crypt_device *cd, struct device *device)
1040 if (!device || !crypt_metadata_locking_enabled())
1043 assert(device_locked(device->lh));
1045 device_unlock_internal(cd, device);
1048 void device_write_unlock(struct crypt_device *cd, struct device *device)
1050 if (!device || !crypt_metadata_locking_enabled())
1053 assert(device_locked(device->lh) && !device_locked_readonly(device->lh));
1055 device_unlock_internal(cd, device);
1058 bool device_is_locked(struct device *device)
1060 return device ? device_locked(device->lh) : 0;
1063 void device_close(struct crypt_device *cd, struct device *device)
1068 if (device->ro_dev_fd != -1) {
1069 log_dbg(cd, "Closing read only fd for %s.", device_path(device));
1070 if (close(device->ro_dev_fd))
1071 log_dbg(cd, "Failed to close read only fd for %s.", device_path(device));
1072 device->ro_dev_fd = -1;
1075 if (device->dev_fd != -1) {
1076 log_dbg(cd, "Closing read write fd for %s.", device_path(device));
1077 if (close(device->dev_fd))
1078 log_dbg(cd, "Failed to close read write fd for %s.", device_path(device));
1079 device->dev_fd = -1;
1083 void device_set_block_size(struct device *device, size_t size)
1088 device->loop_block_size = size;