Merge branches 'acpi-ec' and 'acpi-resource'
[platform/kernel/linux-rpi.git] / fs / ceph / addr.c
index 59cbfb8..f486307 100644 (file)
@@ -18,6 +18,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include "metric.h"
+#include "crypto.h"
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/striper.h>
 
@@ -242,11 +243,13 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
 
 static void finish_netfs_read(struct ceph_osd_request *req)
 {
-       struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+       struct inode *inode = req->r_inode;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
        struct netfs_io_subrequest *subreq = req->r_priv;
-       int num_pages;
+       struct ceph_osd_req_op *op = &req->r_ops[0];
        int err = req->r_result;
+       bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
 
        ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                 req->r_end_latency, osd_data->length, err);
@@ -260,14 +263,29 @@ static void finish_netfs_read(struct ceph_osd_request *req)
        else if (err == -EBLOCKLISTED)
                fsc->blocklisted = true;
 
-       if (err >= 0 && err < subreq->len)
-               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (err >= 0) {
+               if (sparse && err > 0)
+                       err = ceph_sparse_ext_map_end(op);
+               if (err < subreq->len)
+                       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               if (IS_ENCRYPTED(inode) && err > 0) {
+                       err = ceph_fscrypt_decrypt_extents(inode,
+                                       osd_data->pages, subreq->start,
+                                       op->extent.sparse_ext,
+                                       op->extent.sparse_ext_cnt);
+                       if (err > subreq->len)
+                               err = subreq->len;
+               }
+       }
 
+       if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+               ceph_put_page_vector(osd_data->pages,
+                                    calc_pages_for(osd_data->alignment,
+                                       osd_data->length), false);
+       }
        netfs_subreq_terminated(subreq, err, false);
-
-       num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
-       ceph_put_page_vector(osd_data->pages, num_pages, false);
        iput(req->r_inode);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
@@ -334,10 +352,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        struct ceph_osd_request *req = NULL;
        struct ceph_vino vino = ceph_vino(inode);
        struct iov_iter iter;
-       struct page **pages;
-       size_t page_off;
        int err = 0;
        u64 len = subreq->len;
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 off = subreq->start;
 
        if (ceph_inode_is_shutdown(inode)) {
                err = -EIO;
@@ -347,8 +365,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
                return;
 
-       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
-                       0, 1, CEPH_OSD_OP_READ,
+       ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
+
+       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
+                       off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
                        CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
                        NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
        if (IS_ERR(req)) {
@@ -357,20 +377,48 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
                goto out;
        }
 
+       if (sparse) {
+               err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+               if (err)
+                       goto out;
+       }
+
        dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+
        iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-       err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
-       if (err < 0) {
-               dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
-               goto out;
-       }
 
-       /* should always give us a page-aligned read */
-       WARN_ON_ONCE(page_off);
-       len = err;
-       err = 0;
+       /*
+        * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
+        * encrypted inodes. We'd need infrastructure that handles an iov_iter
+        * instead of page arrays, and we don't have that as of yet. Once the
+        * dust settles on the write helpers and encrypt/decrypt routines for
+        * netfs, we should be able to rework this.
+        */
+       if (IS_ENCRYPTED(inode)) {
+               struct page **pages;
+               size_t page_off;
+
+               err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+               if (err < 0) {
+                       dout("%s: iov_ter_get_pages_alloc returned %d\n",
+                            __func__, err);
+                       goto out;
+               }
+
+               /* should always give us a page-aligned read */
+               WARN_ON_ONCE(page_off);
+               len = err;
+               err = 0;
 
-       osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
+                                                false);
+       } else {
+               osd_req_op_extent_osd_iter(req, 0, &iter);
+       }
+       if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+               err = -EIO;
+               goto out;
+       }
        req->r_callback = finish_netfs_read;
        req->r_priv = subreq;
        req->r_inode = inode;
@@ -571,10 +619,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                                      struct page *page, u64 start)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_snap_context *snapc = page_snap_context(page);
+       struct ceph_snap_context *snapc;
        struct ceph_cap_snap *capsnap = NULL;
        u64 end = i_size_read(inode);
+       u64 ret;
 
+       snapc = page_snap_context(ceph_fscrypt_pagecache_page(page));
        if (snapc != ci->i_head_snapc) {
                bool found = false;
                spin_lock(&ci->i_ceph_lock);
@@ -589,9 +639,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                spin_unlock(&ci->i_ceph_lock);
                WARN_ON(!found);
        }
-       if (end > page_offset(page) + thp_size(page))
-               end = page_offset(page) + thp_size(page);
-       return end > start ? end - start : 0;
+       if (end > ceph_fscrypt_page_offset(page) + thp_size(page))
+               end = ceph_fscrypt_page_offset(page) + thp_size(page);
+       ret = end > start ? end - start : 0;
+       if (ret && fscrypt_is_bounce_page(page))
+               ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE);
+       return ret;
 }
 
 /*
@@ -610,10 +663,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        loff_t page_off = page_offset(page);
        int err;
        loff_t len = thp_size(page);
+       loff_t wlen;
        struct ceph_writeback_ctl ceph_wbc;
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_osd_request *req;
        bool caching = ceph_is_cache_enabled(inode);
+       struct page *bounce_page = NULL;
 
        dout("writepage %p idx %lu\n", page, page->index);
 
@@ -649,31 +704,51 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        if (ceph_wbc.i_size < page_off + len)
                len = ceph_wbc.i_size - page_off;
 
+       wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
        dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
-            inode, page, page->index, page_off, len, snapc, snapc->seq);
+            inode, page, page->index, page_off, wlen, snapc, snapc->seq);
 
        if (atomic_long_inc_return(&fsc->writeback_count) >
            CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
                fsc->write_congested = true;
 
-       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
-                                   CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
-                                   ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
-                                   true);
+       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
+                                   page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE,
+                                   CEPH_OSD_FLAG_WRITE, snapc,
+                                   ceph_wbc.truncate_seq,
+                                   ceph_wbc.truncate_size, true);
        if (IS_ERR(req)) {
                redirty_page_for_writepage(wbc, page);
                return PTR_ERR(req);
        }
 
+       if (wlen < len)
+               len = wlen;
+
        set_page_writeback(page);
        if (caching)
                ceph_set_page_fscache(page);
        ceph_fscache_write_to_cache(inode, page_off, len, caching);
 
+       if (IS_ENCRYPTED(inode)) {
+               bounce_page = fscrypt_encrypt_pagecache_blocks(page,
+                                                   CEPH_FSCRYPT_BLOCK_SIZE, 0,
+                                                   GFP_NOFS);
+               if (IS_ERR(bounce_page)) {
+                       redirty_page_for_writepage(wbc, page);
+                       end_page_writeback(page);
+                       ceph_osdc_put_request(req);
+                       return PTR_ERR(bounce_page);
+               }
+       }
+
        /* it may be a short write due to an object boundary */
        WARN_ON_ONCE(len > thp_size(page));
-       osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
-       dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
+       osd_req_op_extent_osd_data_pages(req, 0,
+                       bounce_page ? &bounce_page : &page, wlen, 0,
+                       false, false);
+       dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n",
+            page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not ");
 
        req->r_mtime = inode->i_mtime;
        ceph_osdc_start_request(osdc, req);
@@ -681,7 +756,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
        ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                  req->r_end_latency, len, err);
-
+       fscrypt_free_bounce_page(bounce_page);
        ceph_osdc_put_request(req);
        if (err == 0)
                err = len;
@@ -800,6 +875,11 @@ static void writepages_finish(struct ceph_osd_request *req)
                total_pages += num_pages;
                for (j = 0; j < num_pages; j++) {
                        page = osd_data->pages[j];
+                       if (fscrypt_is_bounce_page(page)) {
+                               page = fscrypt_pagecache_page(page);
+                               fscrypt_free_bounce_page(osd_data->pages[j]);
+                               osd_data->pages[j] = page;
+                       }
                        BUG_ON(!page);
                        WARN_ON(!PageUptodate(page));
 
@@ -835,6 +915,7 @@ static void writepages_finish(struct ceph_osd_request *req)
        else
                kfree(osd_data->pages);
        ceph_osdc_put_request(req);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 /*
@@ -1070,9 +1151,28 @@ get_more_pages:
                                    fsc->mount_options->congestion_kb))
                                fsc->write_congested = true;
 
-                       pages[locked_pages++] = page;
-                       fbatch.folios[i] = NULL;
+                       if (IS_ENCRYPTED(inode)) {
+                               pages[locked_pages] =
+                                       fscrypt_encrypt_pagecache_blocks(page,
+                                               PAGE_SIZE, 0,
+                                               locked_pages ? GFP_NOWAIT : GFP_NOFS);
+                               if (IS_ERR(pages[locked_pages])) {
+                                       if (PTR_ERR(pages[locked_pages]) == -EINVAL)
+                                               pr_err("%s: inode->i_blkbits=%hhu\n",
+                                                       __func__, inode->i_blkbits);
+                                       /* better not fail on first page! */
+                                       BUG_ON(locked_pages == 0);
+                                       pages[locked_pages] = NULL;
+                                       redirty_page_for_writepage(wbc, page);
+                                       unlock_page(page);
+                                       break;
+                               }
+                               ++locked_pages;
+                       } else {
+                               pages[locked_pages++] = page;
+                       }
 
+                       fbatch.folios[i] = NULL;
                        len += thp_size(page);
                }
 
@@ -1100,7 +1200,7 @@ get_more_pages:
                }
 
 new_request:
-               offset = page_offset(pages[0]);
+               offset = ceph_fscrypt_page_offset(pages[0]);
                len = wsize;
 
                req = ceph_osdc_new_request(&fsc->client->osdc,
@@ -1121,9 +1221,13 @@ new_request:
                                                ceph_wbc.truncate_size, true);
                        BUG_ON(IS_ERR(req));
                }
-               BUG_ON(len < page_offset(pages[locked_pages - 1]) +
-                            thp_size(page) - offset);
+               BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
+                            thp_size(pages[locked_pages - 1]) - offset);
 
+               if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+                       rc = -EIO;
+                       goto release_folios;
+               }
                req->r_callback = writepages_finish;
                req->r_inode = inode;
 
@@ -1132,7 +1236,9 @@ new_request:
                data_pages = pages;
                op_idx = 0;
                for (i = 0; i < locked_pages; i++) {
-                       u64 cur_offset = page_offset(pages[i]);
+                       struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+
+                       u64 cur_offset = page_offset(page);
                        /*
                         * Discontinuity in page range? Ceph can handle that by just passing
                         * multiple extents in the write op.
@@ -1161,9 +1267,9 @@ new_request:
                                op_idx++;
                        }
 
-                       set_page_writeback(pages[i]);
+                       set_page_writeback(page);
                        if (caching)
-                               ceph_set_page_fscache(pages[i]);
+                               ceph_set_page_fscache(page);
                        len += thp_size(page);
                }
                ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1179,8 +1285,16 @@ new_request:
                                                         offset);
                        len = max(len, min_len);
                }
+               if (IS_ENCRYPTED(inode))
+                       len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
+
                dout("writepages got pages at %llu~%llu\n", offset, len);
 
+               if (IS_ENCRYPTED(inode) &&
+                   ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
+                       pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
+                               __func__, offset, len);
+
                osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
                                                 0, from_pool, false);
                osd_req_op_extent_update(req, op_idx, len);