From 38c1ddbde6c6593e7c4bc17bde87232b7c577e7b Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Thu, 13 Jul 2023 00:18:32 +0000 Subject: [PATCH] hugetlbfs: improve read HWPOISON hugepage When a hugepage contains HWPOISON pages, read() fails to read any byte of the hugepage and returns -EIO, although many bytes in the HWPOISON hugepage are readable. Improve this by allowing hugetlbfs_read_iter returns as many bytes as possible. For a requested range [offset, offset + len) that contains HWPOISON page, return [offset, first HWPOISON page addr); the next read attempt will fail and return -EIO. Link: https://lkml.kernel.org/r/20230713001833.3778937-4-jiaqiyan@google.com Signed-off-by: Jiaqi Yan Reviewed-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: James Houghton Cc: Miaohe Lin Cc: Muchun Song Cc: Yang Shi Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7b17ccf..e7611ae 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -283,6 +283,41 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, #endif /* + * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset. + * Returns the maximum number of bytes one can read without touching the 1st raw + * HWPOISON subpage. + * + * The implementation borrows the iteration logic from copy_page_to_iter*. + */ +static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes) +{ + size_t n = 0; + size_t res = 0; + + /* First subpage to start the loop. */ + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + while (1) { + if (is_raw_hwpoison_page_in_hugepage(page)) + break; + + /* Safe to read n bytes without touching HWPOISON subpage. */ + n = min(bytes, (size_t)PAGE_SIZE - offset); + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } + } + + return res; +} + +/* * Support for read() - Find the page attached to f_mapping and copy out the * data. This provides functionality similar to filemap_read(). */ @@ -300,7 +335,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) while (iov_iter_count(to)) { struct page *page; - size_t nr, copied; + size_t nr, copied, want; /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); @@ -328,16 +363,26 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { unlock_page(page); - if (PageHWPoison(page)) { - put_page(page); - retval = -EIO; - break; + if (!PageHWPoison(page)) + want = nr; + else { + /* + * Adjust how many bytes safe to read without + * touching the 1st raw HWPOISON subpage after + * offset. + */ + want = adjust_range_hwpoison(page, offset, nr); + if (want == 0) { + put_page(page); + retval = -EIO; + break; + } } /* * We have the page, copy it to user space buffer. */ - copied = copy_page_to_iter(page, offset, nr, to); + copied = copy_page_to_iter(page, offset, want, to); put_page(page); } offset += copied; -- 2.7.4