smb3: allow parallelizing decryption of reads
authorSteve French <stfrench@microsoft.com>
Sat, 7 Sep 2019 06:09:49 +0000 (01:09 -0500)
committerSteve French <stfrench@microsoft.com>
Mon, 16 Sep 2019 16:43:38 +0000 (11:43 -0500)
decrypting large reads on encrypted shares can be slow (e.g. adding
multiple milliseconds per-read on non-GCM capable servers or
when mounting with dialects prior to SMB3.1.1) - allow parallelizing
of read decryption by launching worker threads.

Testing to Samba on localhost showed 25% improvement.
Testing to remote server showed very large improvement when
doing more than one 'cp' command was called at one time.

Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/smb2ops.c

index de90e66..b0ea332 100644 (file)
@@ -109,6 +109,7 @@ extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct        *cifsiod_wq;
+struct workqueue_struct        *decrypt_wq;
 struct workqueue_struct        *cifsoplockd_wq;
 __u32 cifs_lock_secret;
 
@@ -1499,11 +1500,22 @@ init_cifs(void)
                goto out_clean_proc;
        }
 
+       /*
+        * BB Consider setting limit!=0 maybe to min(num_of_cores - 1, 3) so we
+        * don't launch too many worker threads
+        */
+       decrypt_wq = alloc_workqueue("smb3decryptd",
+                                    WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+       if (!decrypt_wq) {
+               rc = -ENOMEM;
+               goto out_destroy_cifsiod_wq;
+       }
+
        cifsoplockd_wq = alloc_workqueue("cifsoplockd",
                                         WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
        if (!cifsoplockd_wq) {
                rc = -ENOMEM;
-               goto out_destroy_cifsiod_wq;
+               goto out_destroy_decrypt_wq;
        }
 
        rc = cifs_fscache_register();
@@ -1569,6 +1581,8 @@ out_unreg_fscache:
        cifs_fscache_unregister();
 out_destroy_cifsoplockd_wq:
        destroy_workqueue(cifsoplockd_wq);
+out_destroy_decrypt_wq:
+       destroy_workqueue(decrypt_wq);
 out_destroy_cifsiod_wq:
        destroy_workqueue(cifsiod_wq);
 out_clean_proc:
@@ -1595,6 +1609,7 @@ exit_cifs(void)
        cifs_destroy_inodecache();
        cifs_fscache_unregister();
        destroy_workqueue(cifsoplockd_wq);
+       destroy_workqueue(decrypt_wq);
        destroy_workqueue(cifsiod_wq);
        cifs_proc_clean();
 }
index 1f53dee..d66106a 100644 (file)
@@ -1892,6 +1892,7 @@ void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
+extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
 extern __u32 cifs_lock_secret;
 
index 83b02d7..c742844 100644 (file)
@@ -4017,8 +4017,58 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
        return length;
 }
 
+struct smb2_decrypt_work {
+       struct work_struct decrypt;
+       struct TCP_Server_Info *server;
+       struct page **ppages;
+       char *buf;
+       unsigned int npages;
+       unsigned int len;
+};
+
+
+static void smb2_decrypt_offload(struct work_struct *work)
+{
+       struct smb2_decrypt_work *dw = container_of(work,
+                               struct smb2_decrypt_work, decrypt);
+       int i, rc;
+       struct mid_q_entry *mid;
+
+       rc = decrypt_raw_data(dw->server, dw->buf, dw->server->vals->read_rsp_size,
+                             dw->ppages, dw->npages, dw->len);
+       if (rc) {
+               cifs_dbg(VFS, "error decrypting rc=%d\n", rc);
+               goto free_pages;
+       }
+
+       mid = smb2_find_mid(dw->server, dw->buf);
+       if (mid == NULL)
+               cifs_dbg(FYI, "mid not found\n");
+       else {
+               mid->decrypted = true;
+               rc = handle_read_data(dw->server, mid, dw->buf,
+                                     dw->server->vals->read_rsp_size,
+                                     dw->ppages, dw->npages, dw->len);
+       }
+
+       dw->server->lstrp = jiffies;
+
+       mid->callback(mid);
+
+       cifs_mid_q_entry_release(mid);
+
+free_pages:
+       for (i = dw->npages-1; i >= 0; i--)
+               put_page(dw->ppages[i]);
+
+       kfree(dw->ppages);
+       cifs_small_buf_release(dw->buf);
+}
+
+
 static int
-receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
+                      int *num_mids)
 {
        char *buf = server->smallbuf;
        struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
@@ -4028,7 +4078,9 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
        unsigned int buflen = server->pdu_size;
        int rc;
        int i = 0;
+       struct smb2_decrypt_work *dw;
 
+       *num_mids = 1;
        len = min_t(unsigned int, buflen, server->vals->read_rsp_size +
                sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
 
@@ -4064,6 +4116,32 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
        if (rc)
                goto free_pages;
 
+       /*
+        * For large reads, offload to different thread for better performance,
+        * use more cores decrypting which can be expensive
+        */
+
+       /* TODO: make the size limit to enable decrypt offload configurable */
+       if (server->pdu_size > (512 * 1024)) {
+               dw = kmalloc(sizeof(struct smb2_decrypt_work), GFP_KERNEL);
+               if (dw == NULL)
+                       goto non_offloaded_decrypt;
+
+               dw->buf = server->smallbuf;
+               server->smallbuf = (char *)cifs_small_buf_get();
+
+               INIT_WORK(&dw->decrypt, smb2_decrypt_offload);
+
+               dw->npages = npages;
+               dw->server = server;
+               dw->ppages = pages;
+               dw->len = len;
+               queue_work(cifsiod_wq, &dw->decrypt);
+               *num_mids = 0; /* worker thread takes care of finding mid */
+               return -1;
+       }
+
+non_offloaded_decrypt:
        rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
                              pages, npages, len);
        if (rc)
@@ -4210,8 +4288,7 @@ smb3_receive_transform(struct TCP_Server_Info *server,
 
        /* TODO: add support for compounds containing READ. */
        if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) {
-               *num_mids = 1;
-               return receive_encrypted_read(server, &mids[0]);
+               return receive_encrypted_read(server, &mids[0], num_mids);
        }
 
        return receive_encrypted_standard(server, mids, bufs, num_mids);