2 * device backend utilities
4 * Copyright (C) 2004 Jana Saout <jana@saout.de>
5 * Copyright (C) 2004-2007 Clemens Fruhwirth <clemens@endorphin.org>
6 * Copyright (C) 2009-2020 Red Hat, Inc. All rights reserved.
7 * Copyright (C) 2009-2020 Milan Broz
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #include <sys/types.h>
30 #include <sys/ioctl.h>
33 #ifdef HAVE_SYS_SYSMACROS_H
34 # include <sys/sysmacros.h> /* for major, minor */
36 #ifdef HAVE_SYS_STATVFS_H
37 # include <sys/statvfs.h>
40 #include "utils_device_locking.h"
52 struct crypt_lock_handle *lh;
54 unsigned int o_direct:1;
55 unsigned int init_done:1; /* path is bdev or loop already initialized */
62 static size_t device_fs_block_size_fd(int fd)
64 size_t page_size = crypt_getpagesize();
66 #ifdef HAVE_SYS_STATVFS_H
70 * NOTE: some filesystems (NFS) returns bogus blocksize (1MB).
71 * Page-size io should always work and avoids increasing IO beyond aligned LUKS header.
73 if (!fstatvfs(fd, &buf) && buf.f_bsize && buf.f_bsize <= page_size)
74 return (size_t)buf.f_bsize;
79 static size_t device_block_size_fd(int fd, size_t *min_size)
85 if (fstat(fd, &st) < 0)
88 if (S_ISREG(st.st_mode))
89 bsize = device_fs_block_size_fd(fd);
91 if (ioctl(fd, BLKSSZGET, &arg) < 0)
92 bsize = crypt_getpagesize();
100 if (S_ISREG(st.st_mode)) {
101 /* file can be empty as well */
102 if (st.st_size > (ssize_t)bsize)
105 *min_size = st.st_size;
107 /* block device must have at least one block */
114 static size_t device_alignment_fd(int devfd)
116 long alignment = DEFAULT_MEM_ALIGNMENT;
118 #ifdef _PC_REC_XFER_ALIGN
119 alignment = fpathconf(devfd, _PC_REC_XFER_ALIGN);
121 alignment = DEFAULT_MEM_ALIGNMENT;
123 return (size_t)alignment;
126 static int device_read_test(int devfd)
130 size_t minsize = 0, blocksize, alignment;
132 blocksize = device_block_size_fd(devfd, &minsize);
133 alignment = device_alignment_fd(devfd);
135 if (!blocksize || !alignment)
141 if (minsize > sizeof(buffer))
142 minsize = sizeof(buffer);
144 if (read_blockwise(devfd, blocksize, alignment, buffer, minsize) == (ssize_t)minsize)
147 crypt_safe_memzero(buffer, sizeof(buffer));
152 * The direct-io is always preferred. The header is usually mapped to the same
153 * device and can be accessed when the rest of device is mapped to data device.
154 * Using direct-io ensures that we do not mess with data in cache.
155 * (But proper alignment should prevent this in the first place.)
156 * The read test is needed to detect broken configurations (seen with remote
157 * block devices) that allow open with direct-io but then fails on read.
159 static int device_ready(struct crypt_device *cd, struct device *device)
161 int devfd = -1, r = 0;
165 if (device->o_direct) {
166 log_dbg(cd, "Trying to open and read device %s with direct-io.",
167 device_path(device));
168 device->o_direct = 0;
169 devfd = open(device_path(device), O_RDONLY | O_DIRECT);
171 if (device_read_test(devfd) == 0) {
172 device->o_direct = 1;
181 log_dbg(cd, "Trying to open device %s without direct-io.",
182 device_path(device));
183 devfd = open(device_path(device), O_RDONLY);
187 log_err(cd, _("Device %s does not exist or access denied."),
188 device_path(device));
192 if (fstat(devfd, &st) < 0)
194 else if (!S_ISBLK(st.st_mode))
195 r = S_ISREG(st.st_mode) ? -ENOTBLK : -EINVAL;
197 log_err(cd, _("Device %s is not compatible."),
198 device_path(device));
203 /* Allow only increase (loop device) */
204 tmp_size = device_alignment_fd(devfd);
205 if (tmp_size > device->alignment)
206 device->alignment = tmp_size;
208 tmp_size = device_block_size_fd(devfd, NULL);
209 if (tmp_size > device->block_size)
210 device->block_size = tmp_size;
216 static int _open_locked(struct crypt_device *cd, struct device *device, int flags)
220 log_dbg(cd, "Opening locked device %s", device_path(device));
222 if ((flags & O_ACCMODE) != O_RDONLY && device_locked_readonly(device->lh)) {
223 log_dbg(cd, "Cannot open locked device %s in write mode. Read lock held.", device_path(device));
227 fd = open(device_path(device), flags);
231 if (device_locked_verify(cd, fd, device->lh)) {
232 /* fd doesn't correspond to a locked resource */
234 log_dbg(cd, "Failed to verify lock resource for device %s.", device_path(device));
242 * Common wrapper for device sync.
244 void device_sync(struct crypt_device *cd, struct device *device)
246 if (!device || device->dev_fd < 0)
249 if (fsync(device->dev_fd) == -1)
250 log_dbg(cd, "Cannot sync device %s.", device_path(device));
254 * in non-locked mode returns always fd or -1
257 * opened fd or one of:
258 * -EAGAIN : requested write mode while device being locked in via shared lock
259 * -EINVAL : invalid lock fd state
260 * -1 : all other errors
262 static int device_open_internal(struct crypt_device *cd, struct device *device, int flags)
266 if (device->o_direct)
269 access = flags & O_ACCMODE;
270 if (access == O_WRONLY)
273 if (access == O_RDONLY && device->ro_dev_fd >= 0) {
274 log_dbg(cd, "Reusing open r%c fd on device %s", 'o', device_path(device));
275 return device->ro_dev_fd;
276 } else if (access == O_RDWR && device->dev_fd >= 0) {
277 log_dbg(cd, "Reusing open r%c fd on device %s", 'w', device_path(device));
278 return device->dev_fd;
281 if (device_locked(device->lh))
282 devfd = _open_locked(cd, device, flags);
284 devfd = open(device_path(device), flags);
287 log_dbg(cd, "Cannot open device %s%s.",
289 access != O_RDONLY ? " for write" : "");
293 if (access == O_RDONLY)
294 device->ro_dev_fd = devfd;
296 device->dev_fd = devfd;
301 int device_open(struct crypt_device *cd, struct device *device, int flags)
303 assert(!device_locked(device->lh));
304 return device_open_internal(cd, device, flags);
307 int device_open_excl(struct crypt_device *cd, struct device *device, int flags)
315 assert(!device_locked(device->lh));
317 if (device->dev_fd_excl < 0) {
318 path = device_path(device);
321 if (!S_ISBLK(st.st_mode))
322 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
325 /* open(2) with O_EXCL (w/o O_CREAT) on regular file is undefined behaviour according to man page */
326 /* coverity[toctou] */
327 device->dev_fd_excl = open(path, O_RDONLY | O_EXCL);
328 if (device->dev_fd_excl < 0)
329 return errno == EBUSY ? -EBUSY : device->dev_fd_excl;
330 if (fstat(device->dev_fd_excl, &st) || !S_ISBLK(st.st_mode)) {
331 log_dbg(cd, "%s is not a block device. Can't open in exclusive mode.",
333 close(device->dev_fd_excl);
334 device->dev_fd_excl = -1;
336 log_dbg(cd, "Device %s is blocked for exclusive open.", path);
340 return device_open_internal(cd, device, flags);
343 void device_release_excl(struct crypt_device *cd, struct device *device)
345 if (device && device->dev_fd_excl >= 0) {
346 if (close(device->dev_fd_excl))
347 log_dbg(cd, "Failed to release exclusive handle on device %s.",
348 device_path(device));
350 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
351 device->dev_fd_excl = -1;
355 int device_open_locked(struct crypt_device *cd, struct device *device, int flags)
357 assert(!crypt_metadata_locking_enabled() || device_locked(device->lh));
358 return device_open_internal(cd, device, flags);
361 /* Avoid any read from device, expects direct-io to work. */
362 int device_alloc_no_check(struct device **device, const char *path)
371 dev = malloc(sizeof(struct device));
375 memset(dev, 0, sizeof(struct device));
376 dev->path = strdup(path);
384 dev->dev_fd_excl = -1;
391 int device_alloc(struct crypt_device *cd, struct device **device, const char *path)
396 r = device_alloc_no_check(&dev, path);
401 r = device_ready(cd, dev);
404 } else if (r == -ENOTBLK) {
405 /* alloc loop later */
417 void device_free(struct crypt_device *cd, struct device *device)
422 device_close(cd, device);
424 if (device->dev_fd_excl != -1) {
425 log_dbg(cd, "Closed exclusive fd for %s.", device_path(device));
426 close(device->dev_fd_excl);
429 if (device->loop_fd != -1) {
430 log_dbg(cd, "Closed loop %s (%s).", device->path, device->file_path);
431 close(device->loop_fd);
434 assert(!device_locked(device->lh));
436 free(device->file_path);
441 /* Get block device path */
442 const char *device_block_path(const struct device *device)
444 if (!device || !device->init_done)
450 /* Get device-mapper name of device (if possible) */
451 const char *device_dm_name(const struct device *device)
453 const char *dmdir = dm_get_dir();
454 size_t dmdir_len = strlen(dmdir);
456 if (!device || !device->init_done)
459 if (strncmp(device->path, dmdir, dmdir_len))
462 return &device->path[dmdir_len+1];
465 /* Get path to device / file */
466 const char *device_path(const struct device *device)
471 if (device->file_path)
472 return device->file_path;
477 /* block device topology ioctls, introduced in 2.6.32 */
479 #define BLKIOMIN _IO(0x12,120)
480 #define BLKIOOPT _IO(0x12,121)
481 #define BLKALIGNOFF _IO(0x12,122)
484 void device_topology_alignment(struct crypt_device *cd,
485 struct device *device,
486 unsigned long *required_alignment, /* bytes */
487 unsigned long *alignment_offset, /* bytes */
488 unsigned long default_alignment)
490 int dev_alignment_offset = 0;
491 unsigned int min_io_size = 0, opt_io_size = 0;
492 unsigned long temp_alignment = 0;
495 *required_alignment = default_alignment;
496 *alignment_offset = 0;
498 if (!device || !device->path) //FIXME
501 fd = open(device->path, O_RDONLY);
505 /* minimum io size */
506 if (ioctl(fd, BLKIOMIN, &min_io_size) == -1) {
507 log_dbg(cd, "Topology info for %s not supported, using default offset %lu bytes.",
508 device->path, default_alignment);
512 /* optimal io size */
513 if (ioctl(fd, BLKIOOPT, &opt_io_size) == -1)
514 opt_io_size = min_io_size;
516 /* alignment offset, bogus -1 means misaligned/unknown */
517 if (ioctl(fd, BLKALIGNOFF, &dev_alignment_offset) == -1 || dev_alignment_offset < 0)
518 dev_alignment_offset = 0;
519 *alignment_offset = (unsigned long)dev_alignment_offset;
521 temp_alignment = (unsigned long)min_io_size;
523 /* Ignore bogus opt-io that could break alignment */
524 if ((temp_alignment < (unsigned long)opt_io_size) &&
525 !((unsigned long)opt_io_size % temp_alignment))
526 temp_alignment = (unsigned long)opt_io_size;
528 /* If calculated alignment is multiple of default, keep default */
529 if (temp_alignment && (default_alignment % temp_alignment))
530 *required_alignment = temp_alignment;
532 log_dbg(cd, "Topology: IO (%u/%u), offset = %lu; Required alignment is %lu bytes.",
533 min_io_size, opt_io_size, *alignment_offset, *required_alignment);
538 size_t device_block_size(struct crypt_device *cd, struct device *device)
545 if (device->block_size)
546 return device->block_size;
548 fd = open(device->file_path ?: device->path, O_RDONLY);
550 device->block_size = device_block_size_fd(fd, NULL);
554 if (!device->block_size)
555 log_dbg(cd, "Cannot get block size for device %s.", device_path(device));
557 return device->block_size;
560 int device_read_ahead(struct device *device, uint32_t *read_ahead)
563 long read_ahead_long;
568 if ((fd = open(device->path, O_RDONLY)) < 0)
571 r = ioctl(fd, BLKRAGET, &read_ahead_long) ? 0 : 1;
575 *read_ahead = (uint32_t) read_ahead_long;
580 /* Get data size in bytes */
581 int device_size(struct device *device, uint64_t *size)
584 int devfd, r = -EINVAL;
586 devfd = open(device->path, O_RDONLY);
590 if (fstat(devfd, &st) < 0)
593 if (S_ISREG(st.st_mode)) {
594 *size = (uint64_t)st.st_size;
596 } else if (ioctl(devfd, BLKGETSIZE64, size) >= 0)
603 /* For a file, allocate the required space */
604 int device_fallocate(struct device *device, uint64_t size)
607 int devfd, r = -EINVAL;
609 devfd = open(device_path(device), O_RDWR);
613 if (!fstat(devfd, &st) && S_ISREG(st.st_mode) &&
614 ((uint64_t)st.st_size >= size || !posix_fallocate(devfd, 0, size))) {
616 if (device->file_path && crypt_loop_resize(device->path))
624 int device_check_size(struct crypt_device *cd,
625 struct device *device,
626 uint64_t req_offset, int falloc)
630 if (device_size(device, &dev_size)) {
631 log_dbg(cd, "Cannot get device size for device %s.", device_path(device));
635 log_dbg(cd, "Device size %" PRIu64 ", offset %" PRIu64 ".", dev_size, req_offset);
637 if (req_offset > dev_size) {
638 /* If it is header file, increase its size */
639 if (falloc && !device_fallocate(device, req_offset))
642 log_err(cd, _("Device %s is too small. Need at least %" PRIu64 " bytes."),
643 device_path(device), req_offset);
650 static int device_info(struct crypt_device *cd,
651 struct device *device,
652 enum devcheck device_check,
653 int *readonly, uint64_t *size)
656 int fd = -1, r, flags = 0, real_readonly;
665 if (stat(device->path, &st) < 0) {
670 /* never wipe header on mounted device */
671 if (device_check == DEV_EXCL && S_ISBLK(st.st_mode))
674 /* Try to open read-write to check whether it is a read-only device */
675 /* coverity[toctou] */
676 fd = open(device->path, O_RDWR | flags);
677 if (fd == -1 && errno == EROFS) {
679 fd = open(device->path, O_RDONLY | flags);
682 if (fd == -1 && device_check == DEV_EXCL && errno == EBUSY) {
688 r = errno ? -errno : -EINVAL;
693 if (S_ISREG(st.st_mode)) {
694 //FIXME: add readonly check
695 real_size = (uint64_t)st.st_size;
696 real_size >>= SECTOR_SHIFT;
698 /* If the device can be opened read-write, i.e. readonly is still 0, then
699 * check whether BKROGET says that it is read-only. E.g. read-only loop
700 * devices may be opened read-write but are read-only according to BLKROGET
702 if (real_readonly == 0 && (r = ioctl(fd, BLKROGET, &real_readonly)) < 0)
705 r = ioctl(fd, BLKGETSIZE64, &real_size);
707 real_size >>= SECTOR_SHIFT;
718 *readonly = real_readonly;
723 log_err(cd, _("Cannot use device %s which is in use "
724 "(already mapped or mounted)."), device_path(device));
727 log_err(cd, _("Cannot use device %s, permission denied."), device_path(device));
730 log_err(cd, _("Cannot get info about device %s."), device_path(device));
737 int device_check_access(struct crypt_device *cd,
738 struct device *device,
739 enum devcheck device_check)
741 return device_info(cd, device, device_check, NULL, NULL);
744 static int device_internal_prepare(struct crypt_device *cd, struct device *device)
746 char *loop_device = NULL, *file_path = NULL;
747 int r, loop_fd, readonly = 0;
749 if (device->init_done)
752 if (getuid() || geteuid()) {
753 log_err(cd, _("Cannot use a loopback device, "
754 "running as non-root user."));
758 log_dbg(cd, "Allocating a free loop device.");
760 /* Keep the loop open, detached on last close. */
761 loop_fd = crypt_loop_attach(&loop_device, device->path, 0, 1, &readonly);
763 log_err(cd, _("Attaching loopback device failed "
764 "(loop device with autoclear flag is required)."));
769 file_path = device->path;
770 device->path = loop_device;
772 r = device_ready(cd, device);
774 device->path = file_path;
775 crypt_loop_detach(loop_device);
780 device->loop_fd = loop_fd;
781 device->file_path = file_path;
782 device->init_done = 1;
787 int device_block_adjust(struct crypt_device *cd,
788 struct device *device,
789 enum devcheck device_check,
790 uint64_t device_offset,
794 int r, real_readonly;
800 r = device_internal_prepare(cd, device);
804 r = device_info(cd, device, device_check, &real_readonly, &real_size);
808 if (device_offset >= real_size) {
809 log_err(cd, _("Requested offset is beyond real size of device %s."),
810 device_path(device));
814 if (size && !*size) {
817 log_err(cd, _("Device %s has zero size."), device_path(device));
820 *size -= device_offset;
823 /* in case of size is set by parameter */
824 if (size && ((real_size - device_offset) < *size)) {
825 log_dbg(cd, "Device %s: offset = %" PRIu64 " requested size = %" PRIu64
826 ", backing device size = %" PRIu64,
827 device->path, device_offset, *size, real_size);
828 log_err(cd, _("Device %s is too small."), device_path(device));
832 if (flags && real_readonly)
833 *flags |= CRYPT_ACTIVATE_READONLY;
836 log_dbg(cd, "Calculated device size is %" PRIu64" sectors (%s), offset %" PRIu64 ".",
837 *size, real_readonly ? "RO" : "RW", device_offset);
841 size_t size_round_up(size_t size, size_t block)
843 size_t s = (size + (block - 1)) / block;
847 void device_disable_direct_io(struct device *device)
849 device->o_direct = 0;
852 int device_direct_io(const struct device *device)
854 return device->o_direct;
857 static dev_t device_devno(const struct device *device)
861 if (stat(device->path, &st) || !S_ISBLK(st.st_mode))
867 int device_is_identical(struct device *device1, struct device *device2)
869 if (!device1 || !device2)
872 if (device1 == device2)
875 if (device1->init_done && device2->init_done)
876 return (device_devno(device1) == device_devno(device2));
877 else if (device1->init_done || device2->init_done)
880 if (!strcmp(device_path(device1), device_path(device2)))
886 int device_is_rotational(struct device *device)
890 if (stat(device_path(device), &st) < 0)
893 if (!S_ISBLK(st.st_mode))
896 return crypt_dev_is_rotational(major(st.st_rdev), minor(st.st_rdev));
899 size_t device_alignment(struct device *device)
903 if (!device->alignment) {
904 devfd = open(device_path(device), O_RDONLY);
906 device->alignment = device_alignment_fd(devfd);
911 return device->alignment;
914 void device_set_lock_handle(struct device *device, struct crypt_lock_handle *h)
919 struct crypt_lock_handle *device_get_lock_handle(struct device *device)
924 int device_read_lock(struct crypt_device *cd, struct device *device)
926 if (!crypt_metadata_locking_enabled())
929 if (device_read_lock_internal(cd, device))
935 int device_write_lock(struct crypt_device *cd, struct device *device)
937 if (!crypt_metadata_locking_enabled())
940 assert(!device_locked(device->lh) || !device_locked_readonly(device->lh));
942 return device_write_lock_internal(cd, device);
945 void device_read_unlock(struct crypt_device *cd, struct device *device)
947 if (!crypt_metadata_locking_enabled())
950 assert(device_locked(device->lh));
952 device_unlock_internal(cd, device);
955 void device_write_unlock(struct crypt_device *cd, struct device *device)
957 if (!crypt_metadata_locking_enabled())
960 assert(device_locked(device->lh) && !device_locked_readonly(device->lh));
962 device_unlock_internal(cd, device);
965 bool device_is_locked(struct device *device)
967 return device ? device_locked(device->lh) : 0;
970 void device_close(struct crypt_device *cd, struct device *device)
975 if (device->ro_dev_fd != -1) {
976 log_dbg(cd, "Closing read only fd for %s.", device_path(device));
977 if (close(device->ro_dev_fd))
978 log_dbg(cd, "Failed to close read only fd for %s.", device_path(device));
979 device->ro_dev_fd = -1;
982 if (device->dev_fd != -1) {
983 log_dbg(cd, "Closing read write fd for %s.", device_path(device));
984 if (close(device->dev_fd))
985 log_dbg(cd, "Failed to close read write fd for %s.", device_path(device));