libceph: Partially revert changes to support MSG_SPLICE_PAGES
[platform/kernel/linux-starfive.git] / net / ceph / messenger_v2.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Ceph msgr2 protocol implementation
4  *
5  * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com>
6  */
7
8 #include <linux/ceph/ceph_debug.h>
9
10 #include <crypto/aead.h>
11 #include <crypto/algapi.h>  /* for crypto_memneq() */
12 #include <crypto/hash.h>
13 #include <crypto/sha2.h>
14 #include <linux/bvec.h>
15 #include <linux/crc32c.h>
16 #include <linux/net.h>
17 #include <linux/scatterlist.h>
18 #include <linux/socket.h>
19 #include <linux/sched/mm.h>
20 #include <net/sock.h>
21 #include <net/tcp.h>
22
23 #include <linux/ceph/ceph_features.h>
24 #include <linux/ceph/decode.h>
25 #include <linux/ceph/libceph.h>
26 #include <linux/ceph/messenger.h>
27
28 #include "crypto.h"  /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */
29
30 #define FRAME_TAG_HELLO                 1
31 #define FRAME_TAG_AUTH_REQUEST          2
32 #define FRAME_TAG_AUTH_BAD_METHOD       3
33 #define FRAME_TAG_AUTH_REPLY_MORE       4
34 #define FRAME_TAG_AUTH_REQUEST_MORE     5
35 #define FRAME_TAG_AUTH_DONE             6
36 #define FRAME_TAG_AUTH_SIGNATURE        7
37 #define FRAME_TAG_CLIENT_IDENT          8
38 #define FRAME_TAG_SERVER_IDENT          9
39 #define FRAME_TAG_IDENT_MISSING_FEATURES 10
40 #define FRAME_TAG_SESSION_RECONNECT     11
41 #define FRAME_TAG_SESSION_RESET         12
42 #define FRAME_TAG_SESSION_RETRY         13
43 #define FRAME_TAG_SESSION_RETRY_GLOBAL  14
44 #define FRAME_TAG_SESSION_RECONNECT_OK  15
45 #define FRAME_TAG_WAIT                  16
46 #define FRAME_TAG_MESSAGE               17
47 #define FRAME_TAG_KEEPALIVE2            18
48 #define FRAME_TAG_KEEPALIVE2_ACK        19
49 #define FRAME_TAG_ACK                   20
50
51 #define FRAME_LATE_STATUS_ABORTED       0x1
52 #define FRAME_LATE_STATUS_COMPLETE      0xe
53 #define FRAME_LATE_STATUS_ABORTED_MASK  0xf
54
55 #define IN_S_HANDLE_PREAMBLE            1
56 #define IN_S_HANDLE_CONTROL             2
57 #define IN_S_HANDLE_CONTROL_REMAINDER   3
58 #define IN_S_PREPARE_READ_DATA          4
59 #define IN_S_PREPARE_READ_DATA_CONT     5
60 #define IN_S_PREPARE_READ_ENC_PAGE      6
61 #define IN_S_HANDLE_EPILOGUE            7
62 #define IN_S_FINISH_SKIP                8
63
64 #define OUT_S_QUEUE_DATA                1
65 #define OUT_S_QUEUE_DATA_CONT           2
66 #define OUT_S_QUEUE_ENC_PAGE            3
67 #define OUT_S_QUEUE_ZEROS               4
68 #define OUT_S_FINISH_MESSAGE            5
69 #define OUT_S_GET_NEXT                  6
70
71 #define CTRL_BODY(p)    ((void *)(p) + CEPH_PREAMBLE_LEN)
72 #define FRONT_PAD(p)    ((void *)(p) + CEPH_EPILOGUE_SECURE_LEN)
73 #define MIDDLE_PAD(p)   (FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN)
74 #define DATA_PAD(p)     (MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN)
75
76 #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
77
78 static int do_recvmsg(struct socket *sock, struct iov_iter *it)
79 {
80         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
81         int ret;
82
83         msg.msg_iter = *it;
84         while (iov_iter_count(it)) {
85                 ret = sock_recvmsg(sock, &msg, msg.msg_flags);
86                 if (ret <= 0) {
87                         if (ret == -EAGAIN)
88                                 ret = 0;
89                         return ret;
90                 }
91
92                 iov_iter_advance(it, ret);
93         }
94
95         WARN_ON(msg_data_left(&msg));
96         return 1;
97 }
98
99 /*
100  * Read as much as possible.
101  *
102  * Return:
103  *   1 - done, nothing (else) to read
104  *   0 - socket is empty, need to wait
105  *  <0 - error
106  */
107 static int ceph_tcp_recv(struct ceph_connection *con)
108 {
109         int ret;
110
111         dout("%s con %p %s %zu\n", __func__, con,
112              iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need",
113              iov_iter_count(&con->v2.in_iter));
114         ret = do_recvmsg(con->sock, &con->v2.in_iter);
115         dout("%s con %p ret %d left %zu\n", __func__, con, ret,
116              iov_iter_count(&con->v2.in_iter));
117         return ret;
118 }
119
120 static int do_sendmsg(struct socket *sock, struct iov_iter *it)
121 {
122         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
123         int ret;
124
125         msg.msg_iter = *it;
126         while (iov_iter_count(it)) {
127                 ret = sock_sendmsg(sock, &msg);
128                 if (ret <= 0) {
129                         if (ret == -EAGAIN)
130                                 ret = 0;
131                         return ret;
132                 }
133
134                 iov_iter_advance(it, ret);
135         }
136
137         WARN_ON(msg_data_left(&msg));
138         return 1;
139 }
140
141 static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
142 {
143         struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
144         struct bio_vec bv;
145         int ret;
146
147         if (WARN_ON(!iov_iter_is_bvec(it)))
148                 return -EINVAL;
149
150         while (iov_iter_count(it)) {
151                 /* iov_iter_iovec() for ITER_BVEC */
152                 bvec_set_page(&bv, it->bvec->bv_page,
153                               min(iov_iter_count(it),
154                                   it->bvec->bv_len - it->iov_offset),
155                               it->bvec->bv_offset + it->iov_offset);
156
157                 /*
158                  * MSG_SPLICE_PAGES cannot properly handle pages with
159                  * page_count == 0, we need to fall back to sendmsg if
160                  * that's the case.
161                  *
162                  * Same goes for slab pages: skb_can_coalesce() allows
163                  * coalescing neighboring slab objects into a single frag
164                  * which triggers one of hardened usercopy checks.
165                  */
166                 if (sendpage_ok(bv.bv_page))
167                         msg.msg_flags |= MSG_SPLICE_PAGES;
168                 else
169                         msg.msg_flags &= ~MSG_SPLICE_PAGES;
170
171                 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
172                 ret = sock_sendmsg(sock, &msg);
173                 if (ret <= 0) {
174                         if (ret == -EAGAIN)
175                                 ret = 0;
176                         return ret;
177                 }
178
179                 iov_iter_advance(it, ret);
180         }
181
182         return 1;
183 }
184
185 /*
186  * Write as much as possible.  The socket is expected to be corked,
187  * so we don't bother with MSG_MORE here.
188  *
189  * Return:
190  *   1 - done, nothing (else) to write
191  *   0 - socket is full, need to wait
192  *  <0 - error
193  */
194 static int ceph_tcp_send(struct ceph_connection *con)
195 {
196         int ret;
197
198         dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
199              iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
200         if (con->v2.out_iter_sendpage)
201                 ret = do_try_sendpage(con->sock, &con->v2.out_iter);
202         else
203                 ret = do_sendmsg(con->sock, &con->v2.out_iter);
204         dout("%s con %p ret %d left %zu\n", __func__, con, ret,
205              iov_iter_count(&con->v2.out_iter));
206         return ret;
207 }
208
209 static void add_in_kvec(struct ceph_connection *con, void *buf, int len)
210 {
211         BUG_ON(con->v2.in_kvec_cnt >= ARRAY_SIZE(con->v2.in_kvecs));
212         WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
213
214         con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_base = buf;
215         con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_len = len;
216         con->v2.in_kvec_cnt++;
217
218         con->v2.in_iter.nr_segs++;
219         con->v2.in_iter.count += len;
220 }
221
222 static void reset_in_kvecs(struct ceph_connection *con)
223 {
224         WARN_ON(iov_iter_count(&con->v2.in_iter));
225
226         con->v2.in_kvec_cnt = 0;
227         iov_iter_kvec(&con->v2.in_iter, ITER_DEST, con->v2.in_kvecs, 0, 0);
228 }
229
230 static void set_in_bvec(struct ceph_connection *con, const struct bio_vec *bv)
231 {
232         WARN_ON(iov_iter_count(&con->v2.in_iter));
233
234         con->v2.in_bvec = *bv;
235         iov_iter_bvec(&con->v2.in_iter, ITER_DEST, &con->v2.in_bvec, 1, bv->bv_len);
236 }
237
238 static void set_in_skip(struct ceph_connection *con, int len)
239 {
240         WARN_ON(iov_iter_count(&con->v2.in_iter));
241
242         dout("%s con %p len %d\n", __func__, con, len);
243         iov_iter_discard(&con->v2.in_iter, ITER_DEST, len);
244 }
245
246 static void add_out_kvec(struct ceph_connection *con, void *buf, int len)
247 {
248         BUG_ON(con->v2.out_kvec_cnt >= ARRAY_SIZE(con->v2.out_kvecs));
249         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
250         WARN_ON(con->v2.out_zero);
251
252         con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_base = buf;
253         con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_len = len;
254         con->v2.out_kvec_cnt++;
255
256         con->v2.out_iter.nr_segs++;
257         con->v2.out_iter.count += len;
258 }
259
260 static void reset_out_kvecs(struct ceph_connection *con)
261 {
262         WARN_ON(iov_iter_count(&con->v2.out_iter));
263         WARN_ON(con->v2.out_zero);
264
265         con->v2.out_kvec_cnt = 0;
266
267         iov_iter_kvec(&con->v2.out_iter, ITER_SOURCE, con->v2.out_kvecs, 0, 0);
268         con->v2.out_iter_sendpage = false;
269 }
270
271 static void set_out_bvec(struct ceph_connection *con, const struct bio_vec *bv,
272                          bool zerocopy)
273 {
274         WARN_ON(iov_iter_count(&con->v2.out_iter));
275         WARN_ON(con->v2.out_zero);
276
277         con->v2.out_bvec = *bv;
278         con->v2.out_iter_sendpage = zerocopy;
279         iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
280                       con->v2.out_bvec.bv_len);
281 }
282
283 static void set_out_bvec_zero(struct ceph_connection *con)
284 {
285         WARN_ON(iov_iter_count(&con->v2.out_iter));
286         WARN_ON(!con->v2.out_zero);
287
288         bvec_set_page(&con->v2.out_bvec, ceph_zero_page,
289                       min(con->v2.out_zero, (int)PAGE_SIZE), 0);
290         con->v2.out_iter_sendpage = true;
291         iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
292                       con->v2.out_bvec.bv_len);
293 }
294
295 static void out_zero_add(struct ceph_connection *con, int len)
296 {
297         dout("%s con %p len %d\n", __func__, con, len);
298         con->v2.out_zero += len;
299 }
300
301 static void *alloc_conn_buf(struct ceph_connection *con, int len)
302 {
303         void *buf;
304
305         dout("%s con %p len %d\n", __func__, con, len);
306
307         if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
308                 return NULL;
309
310         buf = kvmalloc(len, GFP_NOIO);
311         if (!buf)
312                 return NULL;
313
314         con->v2.conn_bufs[con->v2.conn_buf_cnt++] = buf;
315         return buf;
316 }
317
318 static void free_conn_bufs(struct ceph_connection *con)
319 {
320         while (con->v2.conn_buf_cnt)
321                 kvfree(con->v2.conn_bufs[--con->v2.conn_buf_cnt]);
322 }
323
324 static void add_in_sign_kvec(struct ceph_connection *con, void *buf, int len)
325 {
326         BUG_ON(con->v2.in_sign_kvec_cnt >= ARRAY_SIZE(con->v2.in_sign_kvecs));
327
328         con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_base = buf;
329         con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_len = len;
330         con->v2.in_sign_kvec_cnt++;
331 }
332
333 static void clear_in_sign_kvecs(struct ceph_connection *con)
334 {
335         con->v2.in_sign_kvec_cnt = 0;
336 }
337
338 static void add_out_sign_kvec(struct ceph_connection *con, void *buf, int len)
339 {
340         BUG_ON(con->v2.out_sign_kvec_cnt >= ARRAY_SIZE(con->v2.out_sign_kvecs));
341
342         con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_base = buf;
343         con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_len = len;
344         con->v2.out_sign_kvec_cnt++;
345 }
346
347 static void clear_out_sign_kvecs(struct ceph_connection *con)
348 {
349         con->v2.out_sign_kvec_cnt = 0;
350 }
351
352 static bool con_secure(struct ceph_connection *con)
353 {
354         return con->v2.con_mode == CEPH_CON_MODE_SECURE;
355 }
356
357 static int front_len(const struct ceph_msg *msg)
358 {
359         return le32_to_cpu(msg->hdr.front_len);
360 }
361
362 static int middle_len(const struct ceph_msg *msg)
363 {
364         return le32_to_cpu(msg->hdr.middle_len);
365 }
366
367 static int data_len(const struct ceph_msg *msg)
368 {
369         return le32_to_cpu(msg->hdr.data_len);
370 }
371
372 static bool need_padding(int len)
373 {
374         return !IS_ALIGNED(len, CEPH_GCM_BLOCK_LEN);
375 }
376
377 static int padded_len(int len)
378 {
379         return ALIGN(len, CEPH_GCM_BLOCK_LEN);
380 }
381
382 static int padding_len(int len)
383 {
384         return padded_len(len) - len;
385 }
386
387 /* preamble + control segment */
388 static int head_onwire_len(int ctrl_len, bool secure)
389 {
390         int head_len;
391         int rem_len;
392
393         if (secure) {
394                 head_len = CEPH_PREAMBLE_SECURE_LEN;
395                 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
396                         rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
397                         head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN;
398                 }
399         } else {
400                 head_len = CEPH_PREAMBLE_PLAIN_LEN;
401                 if (ctrl_len)
402                         head_len += ctrl_len + CEPH_CRC_LEN;
403         }
404         return head_len;
405 }
406
407 /* front, middle and data segments + epilogue */
408 static int __tail_onwire_len(int front_len, int middle_len, int data_len,
409                              bool secure)
410 {
411         if (!front_len && !middle_len && !data_len)
412                 return 0;
413
414         if (!secure)
415                 return front_len + middle_len + data_len +
416                        CEPH_EPILOGUE_PLAIN_LEN;
417
418         return padded_len(front_len) + padded_len(middle_len) +
419                padded_len(data_len) + CEPH_EPILOGUE_SECURE_LEN;
420 }
421
422 static int tail_onwire_len(const struct ceph_msg *msg, bool secure)
423 {
424         return __tail_onwire_len(front_len(msg), middle_len(msg),
425                                  data_len(msg), secure);
426 }
427
428 /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */
429 #define MESSAGE_HEAD_PLAIN_LEN  (CEPH_PREAMBLE_PLAIN_LEN +              \
430                                  sizeof(struct ceph_msg_header2) +      \
431                                  CEPH_CRC_LEN)
432
433 static const int frame_aligns[] = {
434         sizeof(void *),
435         sizeof(void *),
436         sizeof(void *),
437         PAGE_SIZE
438 };
439
440 /*
441  * Discards trailing empty segments, unless there is just one segment.
442  * A frame always has at least one (possibly empty) segment.
443  */
444 static int calc_segment_count(const int *lens, int len_cnt)
445 {
446         int i;
447
448         for (i = len_cnt - 1; i >= 0; i--) {
449                 if (lens[i])
450                         return i + 1;
451         }
452
453         return 1;
454 }
455
456 static void init_frame_desc(struct ceph_frame_desc *desc, int tag,
457                             const int *lens, int len_cnt)
458 {
459         int i;
460
461         memset(desc, 0, sizeof(*desc));
462
463         desc->fd_tag = tag;
464         desc->fd_seg_cnt = calc_segment_count(lens, len_cnt);
465         BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT);
466         for (i = 0; i < desc->fd_seg_cnt; i++) {
467                 desc->fd_lens[i] = lens[i];
468                 desc->fd_aligns[i] = frame_aligns[i];
469         }
470 }
471
472 /*
473  * Preamble crc covers everything up to itself (28 bytes) and
474  * is calculated and verified irrespective of the connection mode
475  * (i.e. even if the frame is encrypted).
476  */
477 static void encode_preamble(const struct ceph_frame_desc *desc, void *p)
478 {
479         void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
480         void *start = p;
481         int i;
482
483         memset(p, 0, CEPH_PREAMBLE_LEN);
484
485         ceph_encode_8(&p, desc->fd_tag);
486         ceph_encode_8(&p, desc->fd_seg_cnt);
487         for (i = 0; i < desc->fd_seg_cnt; i++) {
488                 ceph_encode_32(&p, desc->fd_lens[i]);
489                 ceph_encode_16(&p, desc->fd_aligns[i]);
490         }
491
492         put_unaligned_le32(crc32c(0, start, crcp - start), crcp);
493 }
494
495 static int decode_preamble(void *p, struct ceph_frame_desc *desc)
496 {
497         void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
498         u32 crc, expected_crc;
499         int i;
500
501         crc = crc32c(0, p, crcp - p);
502         expected_crc = get_unaligned_le32(crcp);
503         if (crc != expected_crc) {
504                 pr_err("bad preamble crc, calculated %u, expected %u\n",
505                        crc, expected_crc);
506                 return -EBADMSG;
507         }
508
509         memset(desc, 0, sizeof(*desc));
510
511         desc->fd_tag = ceph_decode_8(&p);
512         desc->fd_seg_cnt = ceph_decode_8(&p);
513         if (desc->fd_seg_cnt < 1 ||
514             desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT) {
515                 pr_err("bad segment count %d\n", desc->fd_seg_cnt);
516                 return -EINVAL;
517         }
518         for (i = 0; i < desc->fd_seg_cnt; i++) {
519                 desc->fd_lens[i] = ceph_decode_32(&p);
520                 desc->fd_aligns[i] = ceph_decode_16(&p);
521         }
522
523         /*
524          * This would fire for FRAME_TAG_WAIT (it has one empty
525          * segment), but we should never get it as client.
526          */
527         if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
528                 pr_err("last segment empty\n");
529                 return -EINVAL;
530         }
531
532         if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
533                 pr_err("control segment too big %d\n", desc->fd_lens[0]);
534                 return -EINVAL;
535         }
536         if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
537                 pr_err("front segment too big %d\n", desc->fd_lens[1]);
538                 return -EINVAL;
539         }
540         if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
541                 pr_err("middle segment too big %d\n", desc->fd_lens[2]);
542                 return -EINVAL;
543         }
544         if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
545                 pr_err("data segment too big %d\n", desc->fd_lens[3]);
546                 return -EINVAL;
547         }
548
549         return 0;
550 }
551
552 static void encode_epilogue_plain(struct ceph_connection *con, bool aborted)
553 {
554         con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
555                                                  FRAME_LATE_STATUS_COMPLETE;
556         cpu_to_le32s(&con->v2.out_epil.front_crc);
557         cpu_to_le32s(&con->v2.out_epil.middle_crc);
558         cpu_to_le32s(&con->v2.out_epil.data_crc);
559 }
560
561 static void encode_epilogue_secure(struct ceph_connection *con, bool aborted)
562 {
563         memset(&con->v2.out_epil, 0, sizeof(con->v2.out_epil));
564         con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
565                                                  FRAME_LATE_STATUS_COMPLETE;
566 }
567
568 static int decode_epilogue(void *p, u32 *front_crc, u32 *middle_crc,
569                            u32 *data_crc)
570 {
571         u8 late_status;
572
573         late_status = ceph_decode_8(&p);
574         if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) !=
575                         FRAME_LATE_STATUS_COMPLETE) {
576                 /* we should never get an aborted message as client */
577                 pr_err("bad late_status 0x%x\n", late_status);
578                 return -EINVAL;
579         }
580
581         if (front_crc && middle_crc && data_crc) {
582                 *front_crc = ceph_decode_32(&p);
583                 *middle_crc = ceph_decode_32(&p);
584                 *data_crc = ceph_decode_32(&p);
585         }
586
587         return 0;
588 }
589
590 static void fill_header(struct ceph_msg_header *hdr,
591                         const struct ceph_msg_header2 *hdr2,
592                         int front_len, int middle_len, int data_len,
593                         const struct ceph_entity_name *peer_name)
594 {
595         hdr->seq = hdr2->seq;
596         hdr->tid = hdr2->tid;
597         hdr->type = hdr2->type;
598         hdr->priority = hdr2->priority;
599         hdr->version = hdr2->version;
600         hdr->front_len = cpu_to_le32(front_len);
601         hdr->middle_len = cpu_to_le32(middle_len);
602         hdr->data_len = cpu_to_le32(data_len);
603         hdr->data_off = hdr2->data_off;
604         hdr->src = *peer_name;
605         hdr->compat_version = hdr2->compat_version;
606         hdr->reserved = 0;
607         hdr->crc = 0;
608 }
609
610 static void fill_header2(struct ceph_msg_header2 *hdr2,
611                          const struct ceph_msg_header *hdr, u64 ack_seq)
612 {
613         hdr2->seq = hdr->seq;
614         hdr2->tid = hdr->tid;
615         hdr2->type = hdr->type;
616         hdr2->priority = hdr->priority;
617         hdr2->version = hdr->version;
618         hdr2->data_pre_padding_len = 0;
619         hdr2->data_off = hdr->data_off;
620         hdr2->ack_seq = cpu_to_le64(ack_seq);
621         hdr2->flags = 0;
622         hdr2->compat_version = hdr->compat_version;
623         hdr2->reserved = 0;
624 }
625
626 static int verify_control_crc(struct ceph_connection *con)
627 {
628         int ctrl_len = con->v2.in_desc.fd_lens[0];
629         u32 crc, expected_crc;
630
631         WARN_ON(con->v2.in_kvecs[0].iov_len != ctrl_len);
632         WARN_ON(con->v2.in_kvecs[1].iov_len != CEPH_CRC_LEN);
633
634         crc = crc32c(-1, con->v2.in_kvecs[0].iov_base, ctrl_len);
635         expected_crc = get_unaligned_le32(con->v2.in_kvecs[1].iov_base);
636         if (crc != expected_crc) {
637                 pr_err("bad control crc, calculated %u, expected %u\n",
638                        crc, expected_crc);
639                 return -EBADMSG;
640         }
641
642         return 0;
643 }
644
645 static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc,
646                                 u32 middle_crc, u32 data_crc)
647 {
648         if (front_len(con->in_msg)) {
649                 con->in_front_crc = crc32c(-1, con->in_msg->front.iov_base,
650                                            front_len(con->in_msg));
651         } else {
652                 WARN_ON(!middle_len(con->in_msg) && !data_len(con->in_msg));
653                 con->in_front_crc = -1;
654         }
655
656         if (middle_len(con->in_msg))
657                 con->in_middle_crc = crc32c(-1,
658                                             con->in_msg->middle->vec.iov_base,
659                                             middle_len(con->in_msg));
660         else if (data_len(con->in_msg))
661                 con->in_middle_crc = -1;
662         else
663                 con->in_middle_crc = 0;
664
665         if (!data_len(con->in_msg))
666                 con->in_data_crc = 0;
667
668         dout("%s con %p msg %p crcs %u %u %u\n", __func__, con, con->in_msg,
669              con->in_front_crc, con->in_middle_crc, con->in_data_crc);
670
671         if (con->in_front_crc != front_crc) {
672                 pr_err("bad front crc, calculated %u, expected %u\n",
673                        con->in_front_crc, front_crc);
674                 return -EBADMSG;
675         }
676         if (con->in_middle_crc != middle_crc) {
677                 pr_err("bad middle crc, calculated %u, expected %u\n",
678                        con->in_middle_crc, middle_crc);
679                 return -EBADMSG;
680         }
681         if (con->in_data_crc != data_crc) {
682                 pr_err("bad data crc, calculated %u, expected %u\n",
683                        con->in_data_crc, data_crc);
684                 return -EBADMSG;
685         }
686
687         return 0;
688 }
689
690 static int setup_crypto(struct ceph_connection *con,
691                         const u8 *session_key, int session_key_len,
692                         const u8 *con_secret, int con_secret_len)
693 {
694         unsigned int noio_flag;
695         int ret;
696
697         dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
698              __func__, con, con->v2.con_mode, session_key_len, con_secret_len);
699         WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req);
700
701         if (con->v2.con_mode != CEPH_CON_MODE_CRC &&
702             con->v2.con_mode != CEPH_CON_MODE_SECURE) {
703                 pr_err("bad con_mode %d\n", con->v2.con_mode);
704                 return -EINVAL;
705         }
706
707         if (!session_key_len) {
708                 WARN_ON(con->v2.con_mode != CEPH_CON_MODE_CRC);
709                 WARN_ON(con_secret_len);
710                 return 0;  /* auth_none */
711         }
712
713         noio_flag = memalloc_noio_save();
714         con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
715         memalloc_noio_restore(noio_flag);
716         if (IS_ERR(con->v2.hmac_tfm)) {
717                 ret = PTR_ERR(con->v2.hmac_tfm);
718                 con->v2.hmac_tfm = NULL;
719                 pr_err("failed to allocate hmac tfm context: %d\n", ret);
720                 return ret;
721         }
722
723         WARN_ON((unsigned long)session_key &
724                 crypto_shash_alignmask(con->v2.hmac_tfm));
725         ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
726                                   session_key_len);
727         if (ret) {
728                 pr_err("failed to set hmac key: %d\n", ret);
729                 return ret;
730         }
731
732         if (con->v2.con_mode == CEPH_CON_MODE_CRC) {
733                 WARN_ON(con_secret_len);
734                 return 0;  /* auth_x, plain mode */
735         }
736
737         if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) {
738                 pr_err("con_secret too small %d\n", con_secret_len);
739                 return -EINVAL;
740         }
741
742         noio_flag = memalloc_noio_save();
743         con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
744         memalloc_noio_restore(noio_flag);
745         if (IS_ERR(con->v2.gcm_tfm)) {
746                 ret = PTR_ERR(con->v2.gcm_tfm);
747                 con->v2.gcm_tfm = NULL;
748                 pr_err("failed to allocate gcm tfm context: %d\n", ret);
749                 return ret;
750         }
751
752         WARN_ON((unsigned long)con_secret &
753                 crypto_aead_alignmask(con->v2.gcm_tfm));
754         ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN);
755         if (ret) {
756                 pr_err("failed to set gcm key: %d\n", ret);
757                 return ret;
758         }
759
760         WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
761         ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN);
762         if (ret) {
763                 pr_err("failed to set gcm tag size: %d\n", ret);
764                 return ret;
765         }
766
767         con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO);
768         if (!con->v2.gcm_req) {
769                 pr_err("failed to allocate gcm request\n");
770                 return -ENOMEM;
771         }
772
773         crypto_init_wait(&con->v2.gcm_wait);
774         aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
775                                   crypto_req_done, &con->v2.gcm_wait);
776
777         memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN,
778                CEPH_GCM_IV_LEN);
779         memcpy(&con->v2.out_gcm_nonce,
780                con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN,
781                CEPH_GCM_IV_LEN);
782         return 0;  /* auth_x, secure mode */
783 }
784
785 static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
786                        int kvec_cnt, u8 *hmac)
787 {
788         SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm);  /* tfm arg is ignored */
789         int ret;
790         int i;
791
792         dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con,
793              con->v2.hmac_tfm, kvec_cnt);
794
795         if (!con->v2.hmac_tfm) {
796                 memset(hmac, 0, SHA256_DIGEST_SIZE);
797                 return 0;  /* auth_none */
798         }
799
800         desc->tfm = con->v2.hmac_tfm;
801         ret = crypto_shash_init(desc);
802         if (ret)
803                 goto out;
804
805         for (i = 0; i < kvec_cnt; i++) {
806                 WARN_ON((unsigned long)kvecs[i].iov_base &
807                         crypto_shash_alignmask(con->v2.hmac_tfm));
808                 ret = crypto_shash_update(desc, kvecs[i].iov_base,
809                                           kvecs[i].iov_len);
810                 if (ret)
811                         goto out;
812         }
813
814         ret = crypto_shash_final(desc, hmac);
815
816 out:
817         shash_desc_zero(desc);
818         return ret;  /* auth_x, both plain and secure modes */
819 }
820
821 static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
822 {
823         u64 counter;
824
825         counter = le64_to_cpu(nonce->counter);
826         nonce->counter = cpu_to_le64(counter + 1);
827 }
828
829 static int gcm_crypt(struct ceph_connection *con, bool encrypt,
830                      struct scatterlist *src, struct scatterlist *dst,
831                      int src_len)
832 {
833         struct ceph_gcm_nonce *nonce;
834         int ret;
835
836         nonce = encrypt ? &con->v2.out_gcm_nonce : &con->v2.in_gcm_nonce;
837
838         aead_request_set_ad(con->v2.gcm_req, 0);  /* no AAD */
839         aead_request_set_crypt(con->v2.gcm_req, src, dst, src_len, (u8 *)nonce);
840         ret = crypto_wait_req(encrypt ? crypto_aead_encrypt(con->v2.gcm_req) :
841                                         crypto_aead_decrypt(con->v2.gcm_req),
842                               &con->v2.gcm_wait);
843         if (ret)
844                 return ret;
845
846         gcm_inc_nonce(nonce);
847         return 0;
848 }
849
850 static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
851                         struct bio_vec *bv)
852 {
853         struct page *page;
854         size_t off, len;
855
856         WARN_ON(!cursor->total_resid);
857
858         /* skip zero-length data items */
859         while (!cursor->resid)
860                 ceph_msg_data_advance(cursor, 0);
861
862         /* get a piece of data, cursor isn't advanced */
863         page = ceph_msg_data_next(cursor, &off, &len);
864         bvec_set_page(bv, page, len, off);
865 }
866
867 static int calc_sg_cnt(void *buf, int buf_len)
868 {
869         int sg_cnt;
870
871         if (!buf_len)
872                 return 0;
873
874         sg_cnt = need_padding(buf_len) ? 1 : 0;
875         if (is_vmalloc_addr(buf)) {
876                 WARN_ON(offset_in_page(buf));
877                 sg_cnt += PAGE_ALIGN(buf_len) >> PAGE_SHIFT;
878         } else {
879                 sg_cnt++;
880         }
881
882         return sg_cnt;
883 }
884
885 static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor *cursor)
886 {
887         int data_len = cursor->total_resid;
888         struct bio_vec bv;
889         int sg_cnt;
890
891         if (!data_len)
892                 return 0;
893
894         sg_cnt = need_padding(data_len) ? 1 : 0;
895         do {
896                 get_bvec_at(cursor, &bv);
897                 sg_cnt++;
898
899                 ceph_msg_data_advance(cursor, bv.bv_len);
900         } while (cursor->total_resid);
901
902         return sg_cnt;
903 }
904
905 static void init_sgs(struct scatterlist **sg, void *buf, int buf_len, u8 *pad)
906 {
907         void *end = buf + buf_len;
908         struct page *page;
909         int len;
910         void *p;
911
912         if (!buf_len)
913                 return;
914
915         if (is_vmalloc_addr(buf)) {
916                 p = buf;
917                 do {
918                         page = vmalloc_to_page(p);
919                         len = min_t(int, end - p, PAGE_SIZE);
920                         WARN_ON(!page || !len || offset_in_page(p));
921                         sg_set_page(*sg, page, len, 0);
922                         *sg = sg_next(*sg);
923                         p += len;
924                 } while (p != end);
925         } else {
926                 sg_set_buf(*sg, buf, buf_len);
927                 *sg = sg_next(*sg);
928         }
929
930         if (need_padding(buf_len)) {
931                 sg_set_buf(*sg, pad, padding_len(buf_len));
932                 *sg = sg_next(*sg);
933         }
934 }
935
936 static void init_sgs_cursor(struct scatterlist **sg,
937                             struct ceph_msg_data_cursor *cursor, u8 *pad)
938 {
939         int data_len = cursor->total_resid;
940         struct bio_vec bv;
941
942         if (!data_len)
943                 return;
944
945         do {
946                 get_bvec_at(cursor, &bv);
947                 sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
948                 *sg = sg_next(*sg);
949
950                 ceph_msg_data_advance(cursor, bv.bv_len);
951         } while (cursor->total_resid);
952
953         if (need_padding(data_len)) {
954                 sg_set_buf(*sg, pad, padding_len(data_len));
955                 *sg = sg_next(*sg);
956         }
957 }
958
959 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
960                              u8 *front_pad, u8 *middle_pad, u8 *data_pad,
961                              void *epilogue, bool add_tag)
962 {
963         struct ceph_msg_data_cursor cursor;
964         struct scatterlist *cur_sg;
965         int sg_cnt;
966         int ret;
967
968         if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
969                 return 0;
970
971         sg_cnt = 1;  /* epilogue + [auth tag] */
972         if (front_len(msg))
973                 sg_cnt += calc_sg_cnt(msg->front.iov_base,
974                                       front_len(msg));
975         if (middle_len(msg))
976                 sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
977                                       middle_len(msg));
978         if (data_len(msg)) {
979                 ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
980                 sg_cnt += calc_sg_cnt_cursor(&cursor);
981         }
982
983         ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
984         if (ret)
985                 return ret;
986
987         cur_sg = sgt->sgl;
988         if (front_len(msg))
989                 init_sgs(&cur_sg, msg->front.iov_base, front_len(msg),
990                          front_pad);
991         if (middle_len(msg))
992                 init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
993                          middle_pad);
994         if (data_len(msg)) {
995                 ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
996                 init_sgs_cursor(&cur_sg, &cursor, data_pad);
997         }
998
999         WARN_ON(!sg_is_last(cur_sg));
1000         sg_set_buf(cur_sg, epilogue,
1001                    CEPH_GCM_BLOCK_LEN + (add_tag ? CEPH_GCM_TAG_LEN : 0));
1002         return 0;
1003 }
1004
1005 static int decrypt_preamble(struct ceph_connection *con)
1006 {
1007         struct scatterlist sg;
1008
1009         sg_init_one(&sg, con->v2.in_buf, CEPH_PREAMBLE_SECURE_LEN);
1010         return gcm_crypt(con, false, &sg, &sg, CEPH_PREAMBLE_SECURE_LEN);
1011 }
1012
1013 static int decrypt_control_remainder(struct ceph_connection *con)
1014 {
1015         int ctrl_len = con->v2.in_desc.fd_lens[0];
1016         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1017         int pt_len = padding_len(rem_len) + CEPH_GCM_TAG_LEN;
1018         struct scatterlist sgs[2];
1019
1020         WARN_ON(con->v2.in_kvecs[0].iov_len != rem_len);
1021         WARN_ON(con->v2.in_kvecs[1].iov_len != pt_len);
1022
1023         sg_init_table(sgs, 2);
1024         sg_set_buf(&sgs[0], con->v2.in_kvecs[0].iov_base, rem_len);
1025         sg_set_buf(&sgs[1], con->v2.in_buf, pt_len);
1026
1027         return gcm_crypt(con, false, sgs, sgs,
1028                          padded_len(rem_len) + CEPH_GCM_TAG_LEN);
1029 }
1030
1031 static int decrypt_tail(struct ceph_connection *con)
1032 {
1033         struct sg_table enc_sgt = {};
1034         struct sg_table sgt = {};
1035         int tail_len;
1036         int ret;
1037
1038         tail_len = tail_onwire_len(con->in_msg, true);
1039         ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
1040                                         con->v2.in_enc_page_cnt, 0, tail_len,
1041                                         GFP_NOIO);
1042         if (ret)
1043                 goto out;
1044
1045         ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
1046                         MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
1047                         con->v2.in_buf, true);
1048         if (ret)
1049                 goto out;
1050
1051         dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
1052              con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
1053         ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
1054         if (ret)
1055                 goto out;
1056
1057         WARN_ON(!con->v2.in_enc_page_cnt);
1058         ceph_release_page_vector(con->v2.in_enc_pages,
1059                                  con->v2.in_enc_page_cnt);
1060         con->v2.in_enc_pages = NULL;
1061         con->v2.in_enc_page_cnt = 0;
1062
1063 out:
1064         sg_free_table(&sgt);
1065         sg_free_table(&enc_sgt);
1066         return ret;
1067 }
1068
1069 static int prepare_banner(struct ceph_connection *con)
1070 {
1071         int buf_len = CEPH_BANNER_V2_LEN + 2 + 8 + 8;
1072         void *buf, *p;
1073
1074         buf = alloc_conn_buf(con, buf_len);
1075         if (!buf)
1076                 return -ENOMEM;
1077
1078         p = buf;
1079         ceph_encode_copy(&p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN);
1080         ceph_encode_16(&p, sizeof(u64) + sizeof(u64));
1081         ceph_encode_64(&p, CEPH_MSGR2_SUPPORTED_FEATURES);
1082         ceph_encode_64(&p, CEPH_MSGR2_REQUIRED_FEATURES);
1083         WARN_ON(p != buf + buf_len);
1084
1085         add_out_kvec(con, buf, buf_len);
1086         add_out_sign_kvec(con, buf, buf_len);
1087         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1088         return 0;
1089 }
1090
1091 /*
1092  * base:
1093  *   preamble
1094  *   control body (ctrl_len bytes)
1095  *   space for control crc
1096  *
1097  * extdata (optional):
1098  *   control body (extdata_len bytes)
1099  *
1100  * Compute control crc and gather base and extdata into:
1101  *
1102  *   preamble
1103  *   control body (ctrl_len + extdata_len bytes)
1104  *   control crc
1105  *
1106  * Preamble should already be encoded at the start of base.
1107  */
1108 static void prepare_head_plain(struct ceph_connection *con, void *base,
1109                                int ctrl_len, void *extdata, int extdata_len,
1110                                bool to_be_signed)
1111 {
1112         int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN;
1113         void *crcp = base + base_len - CEPH_CRC_LEN;
1114         u32 crc;
1115
1116         crc = crc32c(-1, CTRL_BODY(base), ctrl_len);
1117         if (extdata_len)
1118                 crc = crc32c(crc, extdata, extdata_len);
1119         put_unaligned_le32(crc, crcp);
1120
1121         if (!extdata_len) {
1122                 add_out_kvec(con, base, base_len);
1123                 if (to_be_signed)
1124                         add_out_sign_kvec(con, base, base_len);
1125                 return;
1126         }
1127
1128         add_out_kvec(con, base, crcp - base);
1129         add_out_kvec(con, extdata, extdata_len);
1130         add_out_kvec(con, crcp, CEPH_CRC_LEN);
1131         if (to_be_signed) {
1132                 add_out_sign_kvec(con, base, crcp - base);
1133                 add_out_sign_kvec(con, extdata, extdata_len);
1134                 add_out_sign_kvec(con, crcp, CEPH_CRC_LEN);
1135         }
1136 }
1137
1138 static int prepare_head_secure_small(struct ceph_connection *con,
1139                                      void *base, int ctrl_len)
1140 {
1141         struct scatterlist sg;
1142         int ret;
1143
1144         /* inline buffer padding? */
1145         if (ctrl_len < CEPH_PREAMBLE_INLINE_LEN)
1146                 memset(CTRL_BODY(base) + ctrl_len, 0,
1147                        CEPH_PREAMBLE_INLINE_LEN - ctrl_len);
1148
1149         sg_init_one(&sg, base, CEPH_PREAMBLE_SECURE_LEN);
1150         ret = gcm_crypt(con, true, &sg, &sg,
1151                         CEPH_PREAMBLE_SECURE_LEN - CEPH_GCM_TAG_LEN);
1152         if (ret)
1153                 return ret;
1154
1155         add_out_kvec(con, base, CEPH_PREAMBLE_SECURE_LEN);
1156         return 0;
1157 }
1158
1159 /*
1160  * base:
1161  *   preamble
1162  *   control body (ctrl_len bytes)
1163  *   space for padding, if needed
1164  *   space for control remainder auth tag
1165  *   space for preamble auth tag
1166  *
1167  * Encrypt preamble and the inline portion, then encrypt the remainder
1168  * and gather into:
1169  *
1170  *   preamble
1171  *   control body (48 bytes)
1172  *   preamble auth tag
1173  *   control body (ctrl_len - 48 bytes)
1174  *   zero padding, if needed
1175  *   control remainder auth tag
1176  *
1177  * Preamble should already be encoded at the start of base.
1178  */
1179 static int prepare_head_secure_big(struct ceph_connection *con,
1180                                    void *base, int ctrl_len)
1181 {
1182         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1183         void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN;
1184         void *rem_tag = rem + padded_len(rem_len);
1185         void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN;
1186         struct scatterlist sgs[2];
1187         int ret;
1188
1189         sg_init_table(sgs, 2);
1190         sg_set_buf(&sgs[0], base, rem - base);
1191         sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN);
1192         ret = gcm_crypt(con, true, sgs, sgs, rem - base);
1193         if (ret)
1194                 return ret;
1195
1196         /* control remainder padding? */
1197         if (need_padding(rem_len))
1198                 memset(rem + rem_len, 0, padding_len(rem_len));
1199
1200         sg_init_one(&sgs[0], rem, pmbl_tag - rem);
1201         ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem);
1202         if (ret)
1203                 return ret;
1204
1205         add_out_kvec(con, base, rem - base);
1206         add_out_kvec(con, pmbl_tag, CEPH_GCM_TAG_LEN);
1207         add_out_kvec(con, rem, pmbl_tag - rem);
1208         return 0;
1209 }
1210
1211 static int __prepare_control(struct ceph_connection *con, int tag,
1212                              void *base, int ctrl_len, void *extdata,
1213                              int extdata_len, bool to_be_signed)
1214 {
1215         int total_len = ctrl_len + extdata_len;
1216         struct ceph_frame_desc desc;
1217         int ret;
1218
1219         dout("%s con %p tag %d len %d (%d+%d)\n", __func__, con, tag,
1220              total_len, ctrl_len, extdata_len);
1221
1222         /* extdata may be vmalloc'ed but not base */
1223         if (WARN_ON(is_vmalloc_addr(base) || !ctrl_len))
1224                 return -EINVAL;
1225
1226         init_frame_desc(&desc, tag, &total_len, 1);
1227         encode_preamble(&desc, base);
1228
1229         if (con_secure(con)) {
1230                 if (WARN_ON(extdata_len || to_be_signed))
1231                         return -EINVAL;
1232
1233                 if (ctrl_len <= CEPH_PREAMBLE_INLINE_LEN)
1234                         /* fully inlined, inline buffer may need padding */
1235                         ret = prepare_head_secure_small(con, base, ctrl_len);
1236                 else
1237                         /* partially inlined, inline buffer is full */
1238                         ret = prepare_head_secure_big(con, base, ctrl_len);
1239                 if (ret)
1240                         return ret;
1241         } else {
1242                 prepare_head_plain(con, base, ctrl_len, extdata, extdata_len,
1243                                    to_be_signed);
1244         }
1245
1246         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1247         return 0;
1248 }
1249
1250 static int prepare_control(struct ceph_connection *con, int tag,
1251                            void *base, int ctrl_len)
1252 {
1253         return __prepare_control(con, tag, base, ctrl_len, NULL, 0, false);
1254 }
1255
1256 static int prepare_hello(struct ceph_connection *con)
1257 {
1258         void *buf, *p;
1259         int ctrl_len;
1260
1261         ctrl_len = 1 + ceph_entity_addr_encoding_len(&con->peer_addr);
1262         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1263         if (!buf)
1264                 return -ENOMEM;
1265
1266         p = CTRL_BODY(buf);
1267         ceph_encode_8(&p, CEPH_ENTITY_TYPE_CLIENT);
1268         ceph_encode_entity_addr(&p, &con->peer_addr);
1269         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1270
1271         return __prepare_control(con, FRAME_TAG_HELLO, buf, ctrl_len,
1272                                  NULL, 0, true);
1273 }
1274
1275 /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */
1276 #define AUTH_BUF_LEN    (512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN)
1277
1278 static int prepare_auth_request(struct ceph_connection *con)
1279 {
1280         void *authorizer, *authorizer_copy;
1281         int ctrl_len, authorizer_len;
1282         void *buf;
1283         int ret;
1284
1285         ctrl_len = AUTH_BUF_LEN;
1286         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1287         if (!buf)
1288                 return -ENOMEM;
1289
1290         mutex_unlock(&con->mutex);
1291         ret = con->ops->get_auth_request(con, CTRL_BODY(buf), &ctrl_len,
1292                                          &authorizer, &authorizer_len);
1293         mutex_lock(&con->mutex);
1294         if (con->state != CEPH_CON_S_V2_HELLO) {
1295                 dout("%s con %p state changed to %d\n", __func__, con,
1296                      con->state);
1297                 return -EAGAIN;
1298         }
1299
1300         dout("%s con %p get_auth_request ret %d\n", __func__, con, ret);
1301         if (ret)
1302                 return ret;
1303
1304         authorizer_copy = alloc_conn_buf(con, authorizer_len);
1305         if (!authorizer_copy)
1306                 return -ENOMEM;
1307
1308         memcpy(authorizer_copy, authorizer, authorizer_len);
1309
1310         return __prepare_control(con, FRAME_TAG_AUTH_REQUEST, buf, ctrl_len,
1311                                  authorizer_copy, authorizer_len, true);
1312 }
1313
1314 static int prepare_auth_request_more(struct ceph_connection *con,
1315                                      void *reply, int reply_len)
1316 {
1317         int ctrl_len, authorizer_len;
1318         void *authorizer;
1319         void *buf;
1320         int ret;
1321
1322         ctrl_len = AUTH_BUF_LEN;
1323         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1324         if (!buf)
1325                 return -ENOMEM;
1326
1327         mutex_unlock(&con->mutex);
1328         ret = con->ops->handle_auth_reply_more(con, reply, reply_len,
1329                                                CTRL_BODY(buf), &ctrl_len,
1330                                                &authorizer, &authorizer_len);
1331         mutex_lock(&con->mutex);
1332         if (con->state != CEPH_CON_S_V2_AUTH) {
1333                 dout("%s con %p state changed to %d\n", __func__, con,
1334                      con->state);
1335                 return -EAGAIN;
1336         }
1337
1338         dout("%s con %p handle_auth_reply_more ret %d\n", __func__, con, ret);
1339         if (ret)
1340                 return ret;
1341
1342         return __prepare_control(con, FRAME_TAG_AUTH_REQUEST_MORE, buf,
1343                                  ctrl_len, authorizer, authorizer_len, true);
1344 }
1345
1346 static int prepare_auth_signature(struct ceph_connection *con)
1347 {
1348         void *buf;
1349         int ret;
1350
1351         buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
1352                                                   con_secure(con)));
1353         if (!buf)
1354                 return -ENOMEM;
1355
1356         ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
1357                           CTRL_BODY(buf));
1358         if (ret)
1359                 return ret;
1360
1361         return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf,
1362                                SHA256_DIGEST_SIZE);
1363 }
1364
1365 static int prepare_client_ident(struct ceph_connection *con)
1366 {
1367         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1368         struct ceph_client *client = from_msgr(con->msgr);
1369         u64 global_id = ceph_client_gid(client);
1370         void *buf, *p;
1371         int ctrl_len;
1372
1373         WARN_ON(con->v2.server_cookie);
1374         WARN_ON(con->v2.connect_seq);
1375         WARN_ON(con->v2.peer_global_seq);
1376
1377         if (!con->v2.client_cookie) {
1378                 do {
1379                         get_random_bytes(&con->v2.client_cookie,
1380                                          sizeof(con->v2.client_cookie));
1381                 } while (!con->v2.client_cookie);
1382                 dout("%s con %p generated cookie 0x%llx\n", __func__, con,
1383                      con->v2.client_cookie);
1384         } else {
1385                 dout("%s con %p cookie already set 0x%llx\n", __func__, con,
1386                      con->v2.client_cookie);
1387         }
1388
1389         dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n",
1390              __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1391              ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce),
1392              global_id, con->v2.global_seq, client->supported_features,
1393              client->required_features, con->v2.client_cookie);
1394
1395         ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) +
1396                    ceph_entity_addr_encoding_len(&con->peer_addr) + 6 * 8;
1397         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1398         if (!buf)
1399                 return -ENOMEM;
1400
1401         p = CTRL_BODY(buf);
1402         ceph_encode_8(&p, 2);  /* addrvec marker */
1403         ceph_encode_32(&p, 1);  /* addr_cnt */
1404         ceph_encode_entity_addr(&p, my_addr);
1405         ceph_encode_entity_addr(&p, &con->peer_addr);
1406         ceph_encode_64(&p, global_id);
1407         ceph_encode_64(&p, con->v2.global_seq);
1408         ceph_encode_64(&p, client->supported_features);
1409         ceph_encode_64(&p, client->required_features);
1410         ceph_encode_64(&p, 0);  /* flags */
1411         ceph_encode_64(&p, con->v2.client_cookie);
1412         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1413
1414         return prepare_control(con, FRAME_TAG_CLIENT_IDENT, buf, ctrl_len);
1415 }
1416
1417 static int prepare_session_reconnect(struct ceph_connection *con)
1418 {
1419         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1420         void *buf, *p;
1421         int ctrl_len;
1422
1423         WARN_ON(!con->v2.client_cookie);
1424         WARN_ON(!con->v2.server_cookie);
1425         WARN_ON(!con->v2.connect_seq);
1426         WARN_ON(!con->v2.peer_global_seq);
1427
1428         dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n",
1429              __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1430              con->v2.client_cookie, con->v2.server_cookie, con->v2.global_seq,
1431              con->v2.connect_seq, con->in_seq);
1432
1433         ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 5 * 8;
1434         buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1435         if (!buf)
1436                 return -ENOMEM;
1437
1438         p = CTRL_BODY(buf);
1439         ceph_encode_8(&p, 2);  /* entity_addrvec_t marker */
1440         ceph_encode_32(&p, 1);  /* my_addrs len */
1441         ceph_encode_entity_addr(&p, my_addr);
1442         ceph_encode_64(&p, con->v2.client_cookie);
1443         ceph_encode_64(&p, con->v2.server_cookie);
1444         ceph_encode_64(&p, con->v2.global_seq);
1445         ceph_encode_64(&p, con->v2.connect_seq);
1446         ceph_encode_64(&p, con->in_seq);
1447         WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1448
1449         return prepare_control(con, FRAME_TAG_SESSION_RECONNECT, buf, ctrl_len);
1450 }
1451
1452 static int prepare_keepalive2(struct ceph_connection *con)
1453 {
1454         struct ceph_timespec *ts = CTRL_BODY(con->v2.out_buf);
1455         struct timespec64 now;
1456
1457         ktime_get_real_ts64(&now);
1458         dout("%s con %p timestamp %lld.%09ld\n", __func__, con, now.tv_sec,
1459              now.tv_nsec);
1460
1461         ceph_encode_timespec64(ts, &now);
1462
1463         reset_out_kvecs(con);
1464         return prepare_control(con, FRAME_TAG_KEEPALIVE2, con->v2.out_buf,
1465                                sizeof(struct ceph_timespec));
1466 }
1467
1468 static int prepare_ack(struct ceph_connection *con)
1469 {
1470         void *p;
1471
1472         dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1473              con->in_seq_acked, con->in_seq);
1474         con->in_seq_acked = con->in_seq;
1475
1476         p = CTRL_BODY(con->v2.out_buf);
1477         ceph_encode_64(&p, con->in_seq_acked);
1478
1479         reset_out_kvecs(con);
1480         return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8);
1481 }
1482
1483 static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted)
1484 {
1485         dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con,
1486              con->out_msg, aborted, con->v2.out_epil.front_crc,
1487              con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc);
1488
1489         encode_epilogue_plain(con, aborted);
1490         add_out_kvec(con, &con->v2.out_epil, CEPH_EPILOGUE_PLAIN_LEN);
1491 }
1492
1493 /*
1494  * For "used" empty segments, crc is -1.  For unused (trailing)
1495  * segments, crc is 0.
1496  */
1497 static void prepare_message_plain(struct ceph_connection *con)
1498 {
1499         struct ceph_msg *msg = con->out_msg;
1500
1501         prepare_head_plain(con, con->v2.out_buf,
1502                            sizeof(struct ceph_msg_header2), NULL, 0, false);
1503
1504         if (!front_len(msg) && !middle_len(msg)) {
1505                 if (!data_len(msg)) {
1506                         /*
1507                          * Empty message: once the head is written,
1508                          * we are done -- there is no epilogue.
1509                          */
1510                         con->v2.out_state = OUT_S_FINISH_MESSAGE;
1511                         return;
1512                 }
1513
1514                 con->v2.out_epil.front_crc = -1;
1515                 con->v2.out_epil.middle_crc = -1;
1516                 con->v2.out_state = OUT_S_QUEUE_DATA;
1517                 return;
1518         }
1519
1520         if (front_len(msg)) {
1521                 con->v2.out_epil.front_crc = crc32c(-1, msg->front.iov_base,
1522                                                     front_len(msg));
1523                 add_out_kvec(con, msg->front.iov_base, front_len(msg));
1524         } else {
1525                 /* middle (at least) is there, checked above */
1526                 con->v2.out_epil.front_crc = -1;
1527         }
1528
1529         if (middle_len(msg)) {
1530                 con->v2.out_epil.middle_crc =
1531                         crc32c(-1, msg->middle->vec.iov_base, middle_len(msg));
1532                 add_out_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1533         } else {
1534                 con->v2.out_epil.middle_crc = data_len(msg) ? -1 : 0;
1535         }
1536
1537         if (data_len(msg)) {
1538                 con->v2.out_state = OUT_S_QUEUE_DATA;
1539         } else {
1540                 con->v2.out_epil.data_crc = 0;
1541                 prepare_epilogue_plain(con, false);
1542                 con->v2.out_state = OUT_S_FINISH_MESSAGE;
1543         }
1544 }
1545
1546 /*
1547  * Unfortunately the kernel crypto API doesn't support streaming
1548  * (piecewise) operation for AEAD algorithms, so we can't get away
1549  * with a fixed size buffer and a couple sgs.  Instead, we have to
1550  * allocate pages for the entire tail of the message (currently up
1551  * to ~32M) and two sgs arrays (up to ~256K each)...
1552  */
1553 static int prepare_message_secure(struct ceph_connection *con)
1554 {
1555         void *zerop = page_address(ceph_zero_page);
1556         struct sg_table enc_sgt = {};
1557         struct sg_table sgt = {};
1558         struct page **enc_pages;
1559         int enc_page_cnt;
1560         int tail_len;
1561         int ret;
1562
1563         ret = prepare_head_secure_small(con, con->v2.out_buf,
1564                                         sizeof(struct ceph_msg_header2));
1565         if (ret)
1566                 return ret;
1567
1568         tail_len = tail_onwire_len(con->out_msg, true);
1569         if (!tail_len) {
1570                 /*
1571                  * Empty message: once the head is written,
1572                  * we are done -- there is no epilogue.
1573                  */
1574                 con->v2.out_state = OUT_S_FINISH_MESSAGE;
1575                 return 0;
1576         }
1577
1578         encode_epilogue_secure(con, false);
1579         ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
1580                                 &con->v2.out_epil, false);
1581         if (ret)
1582                 goto out;
1583
1584         enc_page_cnt = calc_pages_for(0, tail_len);
1585         enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1586         if (IS_ERR(enc_pages)) {
1587                 ret = PTR_ERR(enc_pages);
1588                 goto out;
1589         }
1590
1591         WARN_ON(con->v2.out_enc_pages || con->v2.out_enc_page_cnt);
1592         con->v2.out_enc_pages = enc_pages;
1593         con->v2.out_enc_page_cnt = enc_page_cnt;
1594         con->v2.out_enc_resid = tail_len;
1595         con->v2.out_enc_i = 0;
1596
1597         ret = sg_alloc_table_from_pages(&enc_sgt, enc_pages, enc_page_cnt,
1598                                         0, tail_len, GFP_NOIO);
1599         if (ret)
1600                 goto out;
1601
1602         ret = gcm_crypt(con, true, sgt.sgl, enc_sgt.sgl,
1603                         tail_len - CEPH_GCM_TAG_LEN);
1604         if (ret)
1605                 goto out;
1606
1607         dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con,
1608              con->out_msg, sgt.orig_nents, enc_page_cnt);
1609         con->v2.out_state = OUT_S_QUEUE_ENC_PAGE;
1610
1611 out:
1612         sg_free_table(&sgt);
1613         sg_free_table(&enc_sgt);
1614         return ret;
1615 }
1616
1617 static int prepare_message(struct ceph_connection *con)
1618 {
1619         int lens[] = {
1620                 sizeof(struct ceph_msg_header2),
1621                 front_len(con->out_msg),
1622                 middle_len(con->out_msg),
1623                 data_len(con->out_msg)
1624         };
1625         struct ceph_frame_desc desc;
1626         int ret;
1627
1628         dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con,
1629              con->out_msg, lens[0], lens[1], lens[2], lens[3]);
1630
1631         if (con->in_seq > con->in_seq_acked) {
1632                 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1633                      con->in_seq_acked, con->in_seq);
1634                 con->in_seq_acked = con->in_seq;
1635         }
1636
1637         reset_out_kvecs(con);
1638         init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4);
1639         encode_preamble(&desc, con->v2.out_buf);
1640         fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr,
1641                      con->in_seq_acked);
1642
1643         if (con_secure(con)) {
1644                 ret = prepare_message_secure(con);
1645                 if (ret)
1646                         return ret;
1647         } else {
1648                 prepare_message_plain(con);
1649         }
1650
1651         ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1652         return 0;
1653 }
1654
1655 static int prepare_read_banner_prefix(struct ceph_connection *con)
1656 {
1657         void *buf;
1658
1659         buf = alloc_conn_buf(con, CEPH_BANNER_V2_PREFIX_LEN);
1660         if (!buf)
1661                 return -ENOMEM;
1662
1663         reset_in_kvecs(con);
1664         add_in_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1665         add_in_sign_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1666         con->state = CEPH_CON_S_V2_BANNER_PREFIX;
1667         return 0;
1668 }
1669
1670 static int prepare_read_banner_payload(struct ceph_connection *con,
1671                                        int payload_len)
1672 {
1673         void *buf;
1674
1675         buf = alloc_conn_buf(con, payload_len);
1676         if (!buf)
1677                 return -ENOMEM;
1678
1679         reset_in_kvecs(con);
1680         add_in_kvec(con, buf, payload_len);
1681         add_in_sign_kvec(con, buf, payload_len);
1682         con->state = CEPH_CON_S_V2_BANNER_PAYLOAD;
1683         return 0;
1684 }
1685
1686 static void prepare_read_preamble(struct ceph_connection *con)
1687 {
1688         reset_in_kvecs(con);
1689         add_in_kvec(con, con->v2.in_buf,
1690                     con_secure(con) ? CEPH_PREAMBLE_SECURE_LEN :
1691                                       CEPH_PREAMBLE_PLAIN_LEN);
1692         con->v2.in_state = IN_S_HANDLE_PREAMBLE;
1693 }
1694
1695 static int prepare_read_control(struct ceph_connection *con)
1696 {
1697         int ctrl_len = con->v2.in_desc.fd_lens[0];
1698         int head_len;
1699         void *buf;
1700
1701         reset_in_kvecs(con);
1702         if (con->state == CEPH_CON_S_V2_HELLO ||
1703             con->state == CEPH_CON_S_V2_AUTH) {
1704                 head_len = head_onwire_len(ctrl_len, false);
1705                 buf = alloc_conn_buf(con, head_len);
1706                 if (!buf)
1707                         return -ENOMEM;
1708
1709                 /* preserve preamble */
1710                 memcpy(buf, con->v2.in_buf, CEPH_PREAMBLE_LEN);
1711
1712                 add_in_kvec(con, CTRL_BODY(buf), ctrl_len);
1713                 add_in_kvec(con, CTRL_BODY(buf) + ctrl_len, CEPH_CRC_LEN);
1714                 add_in_sign_kvec(con, buf, head_len);
1715         } else {
1716                 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
1717                         buf = alloc_conn_buf(con, ctrl_len);
1718                         if (!buf)
1719                                 return -ENOMEM;
1720
1721                         add_in_kvec(con, buf, ctrl_len);
1722                 } else {
1723                         add_in_kvec(con, CTRL_BODY(con->v2.in_buf), ctrl_len);
1724                 }
1725                 add_in_kvec(con, con->v2.in_buf, CEPH_CRC_LEN);
1726         }
1727         con->v2.in_state = IN_S_HANDLE_CONTROL;
1728         return 0;
1729 }
1730
1731 static int prepare_read_control_remainder(struct ceph_connection *con)
1732 {
1733         int ctrl_len = con->v2.in_desc.fd_lens[0];
1734         int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1735         void *buf;
1736
1737         buf = alloc_conn_buf(con, ctrl_len);
1738         if (!buf)
1739                 return -ENOMEM;
1740
1741         memcpy(buf, CTRL_BODY(con->v2.in_buf), CEPH_PREAMBLE_INLINE_LEN);
1742
1743         reset_in_kvecs(con);
1744         add_in_kvec(con, buf + CEPH_PREAMBLE_INLINE_LEN, rem_len);
1745         add_in_kvec(con, con->v2.in_buf,
1746                     padding_len(rem_len) + CEPH_GCM_TAG_LEN);
1747         con->v2.in_state = IN_S_HANDLE_CONTROL_REMAINDER;
1748         return 0;
1749 }
1750
1751 static int prepare_read_data(struct ceph_connection *con)
1752 {
1753         struct bio_vec bv;
1754
1755         con->in_data_crc = -1;
1756         ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
1757                                   data_len(con->in_msg));
1758
1759         get_bvec_at(&con->v2.in_cursor, &bv);
1760         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1761                 if (unlikely(!con->bounce_page)) {
1762                         con->bounce_page = alloc_page(GFP_NOIO);
1763                         if (!con->bounce_page) {
1764                                 pr_err("failed to allocate bounce page\n");
1765                                 return -ENOMEM;
1766                         }
1767                 }
1768
1769                 bv.bv_page = con->bounce_page;
1770                 bv.bv_offset = 0;
1771         }
1772         set_in_bvec(con, &bv);
1773         con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
1774         return 0;
1775 }
1776
1777 static void prepare_read_data_cont(struct ceph_connection *con)
1778 {
1779         struct bio_vec bv;
1780
1781         if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1782                 con->in_data_crc = crc32c(con->in_data_crc,
1783                                           page_address(con->bounce_page),
1784                                           con->v2.in_bvec.bv_len);
1785
1786                 get_bvec_at(&con->v2.in_cursor, &bv);
1787                 memcpy_to_page(bv.bv_page, bv.bv_offset,
1788                                page_address(con->bounce_page),
1789                                con->v2.in_bvec.bv_len);
1790         } else {
1791                 con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1792                                                     con->v2.in_bvec.bv_page,
1793                                                     con->v2.in_bvec.bv_offset,
1794                                                     con->v2.in_bvec.bv_len);
1795         }
1796
1797         ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
1798         if (con->v2.in_cursor.total_resid) {
1799                 get_bvec_at(&con->v2.in_cursor, &bv);
1800                 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1801                         bv.bv_page = con->bounce_page;
1802                         bv.bv_offset = 0;
1803                 }
1804                 set_in_bvec(con, &bv);
1805                 WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
1806                 return;
1807         }
1808
1809         /*
1810          * We've read all data.  Prepare to read epilogue.
1811          */
1812         reset_in_kvecs(con);
1813         add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1814         con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1815 }
1816
1817 static int prepare_read_tail_plain(struct ceph_connection *con)
1818 {
1819         struct ceph_msg *msg = con->in_msg;
1820
1821         if (!front_len(msg) && !middle_len(msg)) {
1822                 WARN_ON(!data_len(msg));
1823                 return prepare_read_data(con);
1824         }
1825
1826         reset_in_kvecs(con);
1827         if (front_len(msg)) {
1828                 add_in_kvec(con, msg->front.iov_base, front_len(msg));
1829                 WARN_ON(msg->front.iov_len != front_len(msg));
1830         }
1831         if (middle_len(msg)) {
1832                 add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1833                 WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
1834         }
1835
1836         if (data_len(msg)) {
1837                 con->v2.in_state = IN_S_PREPARE_READ_DATA;
1838         } else {
1839                 add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1840                 con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1841         }
1842         return 0;
1843 }
1844
1845 static void prepare_read_enc_page(struct ceph_connection *con)
1846 {
1847         struct bio_vec bv;
1848
1849         dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
1850              con->v2.in_enc_resid);
1851         WARN_ON(!con->v2.in_enc_resid);
1852
1853         bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i],
1854                       min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0);
1855
1856         set_in_bvec(con, &bv);
1857         con->v2.in_enc_i++;
1858         con->v2.in_enc_resid -= bv.bv_len;
1859
1860         if (con->v2.in_enc_resid) {
1861                 con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
1862                 return;
1863         }
1864
1865         /*
1866          * We are set to read the last piece of ciphertext (ending
1867          * with epilogue) + auth tag.
1868          */
1869         WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
1870         con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1871 }
1872
1873 static int prepare_read_tail_secure(struct ceph_connection *con)
1874 {
1875         struct page **enc_pages;
1876         int enc_page_cnt;
1877         int tail_len;
1878
1879         tail_len = tail_onwire_len(con->in_msg, true);
1880         WARN_ON(!tail_len);
1881
1882         enc_page_cnt = calc_pages_for(0, tail_len);
1883         enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1884         if (IS_ERR(enc_pages))
1885                 return PTR_ERR(enc_pages);
1886
1887         WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
1888         con->v2.in_enc_pages = enc_pages;
1889         con->v2.in_enc_page_cnt = enc_page_cnt;
1890         con->v2.in_enc_resid = tail_len;
1891         con->v2.in_enc_i = 0;
1892
1893         prepare_read_enc_page(con);
1894         return 0;
1895 }
1896
1897 static void __finish_skip(struct ceph_connection *con)
1898 {
1899         con->in_seq++;
1900         prepare_read_preamble(con);
1901 }
1902
1903 static void prepare_skip_message(struct ceph_connection *con)
1904 {
1905         struct ceph_frame_desc *desc = &con->v2.in_desc;
1906         int tail_len;
1907
1908         dout("%s con %p %d+%d+%d\n", __func__, con, desc->fd_lens[1],
1909              desc->fd_lens[2], desc->fd_lens[3]);
1910
1911         tail_len = __tail_onwire_len(desc->fd_lens[1], desc->fd_lens[2],
1912                                      desc->fd_lens[3], con_secure(con));
1913         if (!tail_len) {
1914                 __finish_skip(con);
1915         } else {
1916                 set_in_skip(con, tail_len);
1917                 con->v2.in_state = IN_S_FINISH_SKIP;
1918         }
1919 }
1920
1921 static int process_banner_prefix(struct ceph_connection *con)
1922 {
1923         int payload_len;
1924         void *p;
1925
1926         WARN_ON(con->v2.in_kvecs[0].iov_len != CEPH_BANNER_V2_PREFIX_LEN);
1927
1928         p = con->v2.in_kvecs[0].iov_base;
1929         if (memcmp(p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN)) {
1930                 if (!memcmp(p, CEPH_BANNER, CEPH_BANNER_LEN))
1931                         con->error_msg = "server is speaking msgr1 protocol";
1932                 else
1933                         con->error_msg = "protocol error, bad banner";
1934                 return -EINVAL;
1935         }
1936
1937         p += CEPH_BANNER_V2_LEN;
1938         payload_len = ceph_decode_16(&p);
1939         dout("%s con %p payload_len %d\n", __func__, con, payload_len);
1940
1941         return prepare_read_banner_payload(con, payload_len);
1942 }
1943
1944 static int process_banner_payload(struct ceph_connection *con)
1945 {
1946         void *end = con->v2.in_kvecs[0].iov_base + con->v2.in_kvecs[0].iov_len;
1947         u64 feat = CEPH_MSGR2_SUPPORTED_FEATURES;
1948         u64 req_feat = CEPH_MSGR2_REQUIRED_FEATURES;
1949         u64 server_feat, server_req_feat;
1950         void *p;
1951         int ret;
1952
1953         p = con->v2.in_kvecs[0].iov_base;
1954         ceph_decode_64_safe(&p, end, server_feat, bad);
1955         ceph_decode_64_safe(&p, end, server_req_feat, bad);
1956
1957         dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n",
1958              __func__, con, server_feat, server_req_feat);
1959
1960         if (req_feat & ~server_feat) {
1961                 pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
1962                        server_feat, req_feat & ~server_feat);
1963                 con->error_msg = "missing required protocol features";
1964                 return -EINVAL;
1965         }
1966         if (server_req_feat & ~feat) {
1967                 pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
1968                        feat, server_req_feat & ~feat);
1969                 con->error_msg = "missing required protocol features";
1970                 return -EINVAL;
1971         }
1972
1973         /* no reset_out_kvecs() as our banner may still be pending */
1974         ret = prepare_hello(con);
1975         if (ret) {
1976                 pr_err("prepare_hello failed: %d\n", ret);
1977                 return ret;
1978         }
1979
1980         con->state = CEPH_CON_S_V2_HELLO;
1981         prepare_read_preamble(con);
1982         return 0;
1983
1984 bad:
1985         pr_err("failed to decode banner payload\n");
1986         return -EINVAL;
1987 }
1988
1989 static int process_hello(struct ceph_connection *con, void *p, void *end)
1990 {
1991         struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1992         struct ceph_entity_addr addr_for_me;
1993         u8 entity_type;
1994         int ret;
1995
1996         if (con->state != CEPH_CON_S_V2_HELLO) {
1997                 con->error_msg = "protocol error, unexpected hello";
1998                 return -EINVAL;
1999         }
2000
2001         ceph_decode_8_safe(&p, end, entity_type, bad);
2002         ret = ceph_decode_entity_addr(&p, end, &addr_for_me);
2003         if (ret) {
2004                 pr_err("failed to decode addr_for_me: %d\n", ret);
2005                 return ret;
2006         }
2007
2008         dout("%s con %p entity_type %d addr_for_me %s\n", __func__, con,
2009              entity_type, ceph_pr_addr(&addr_for_me));
2010
2011         if (entity_type != con->peer_name.type) {
2012                 pr_err("bad peer type, want %d, got %d\n",
2013                        con->peer_name.type, entity_type);
2014                 con->error_msg = "wrong peer at address";
2015                 return -EINVAL;
2016         }
2017
2018         /*
2019          * Set our address to the address our first peer (i.e. monitor)
2020          * sees that we are connecting from.  If we are behind some sort
2021          * of NAT and want to be identified by some private (not NATed)
2022          * address, ip option should be used.
2023          */
2024         if (ceph_addr_is_blank(my_addr)) {
2025                 memcpy(&my_addr->in_addr, &addr_for_me.in_addr,
2026                        sizeof(my_addr->in_addr));
2027                 ceph_addr_set_port(my_addr, 0);
2028                 dout("%s con %p set my addr %s, as seen by peer %s\n",
2029                      __func__, con, ceph_pr_addr(my_addr),
2030                      ceph_pr_addr(&con->peer_addr));
2031         } else {
2032                 dout("%s con %p my addr already set %s\n",
2033                      __func__, con, ceph_pr_addr(my_addr));
2034         }
2035
2036         WARN_ON(ceph_addr_is_blank(my_addr) || ceph_addr_port(my_addr));
2037         WARN_ON(my_addr->type != CEPH_ENTITY_ADDR_TYPE_ANY);
2038         WARN_ON(!my_addr->nonce);
2039
2040         /* no reset_out_kvecs() as our hello may still be pending */
2041         ret = prepare_auth_request(con);
2042         if (ret) {
2043                 if (ret != -EAGAIN)
2044                         pr_err("prepare_auth_request failed: %d\n", ret);
2045                 return ret;
2046         }
2047
2048         con->state = CEPH_CON_S_V2_AUTH;
2049         return 0;
2050
2051 bad:
2052         pr_err("failed to decode hello\n");
2053         return -EINVAL;
2054 }
2055
2056 static int process_auth_bad_method(struct ceph_connection *con,
2057                                    void *p, void *end)
2058 {
2059         int allowed_protos[8], allowed_modes[8];
2060         int allowed_proto_cnt, allowed_mode_cnt;
2061         int used_proto, result;
2062         int ret;
2063         int i;
2064
2065         if (con->state != CEPH_CON_S_V2_AUTH) {
2066                 con->error_msg = "protocol error, unexpected auth_bad_method";
2067                 return -EINVAL;
2068         }
2069
2070         ceph_decode_32_safe(&p, end, used_proto, bad);
2071         ceph_decode_32_safe(&p, end, result, bad);
2072         dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto,
2073              result);
2074
2075         ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad);
2076         if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) {
2077                 pr_err("allowed_protos too big %d\n", allowed_proto_cnt);
2078                 return -EINVAL;
2079         }
2080         for (i = 0; i < allowed_proto_cnt; i++) {
2081                 ceph_decode_32_safe(&p, end, allowed_protos[i], bad);
2082                 dout("%s con %p allowed_protos[%d] %d\n", __func__, con,
2083                      i, allowed_protos[i]);
2084         }
2085
2086         ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad);
2087         if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) {
2088                 pr_err("allowed_modes too big %d\n", allowed_mode_cnt);
2089                 return -EINVAL;
2090         }
2091         for (i = 0; i < allowed_mode_cnt; i++) {
2092                 ceph_decode_32_safe(&p, end, allowed_modes[i], bad);
2093                 dout("%s con %p allowed_modes[%d] %d\n", __func__, con,
2094                      i, allowed_modes[i]);
2095         }
2096
2097         mutex_unlock(&con->mutex);
2098         ret = con->ops->handle_auth_bad_method(con, used_proto, result,
2099                                                allowed_protos,
2100                                                allowed_proto_cnt,
2101                                                allowed_modes,
2102                                                allowed_mode_cnt);
2103         mutex_lock(&con->mutex);
2104         if (con->state != CEPH_CON_S_V2_AUTH) {
2105                 dout("%s con %p state changed to %d\n", __func__, con,
2106                      con->state);
2107                 return -EAGAIN;
2108         }
2109
2110         dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret);
2111         return ret;
2112
2113 bad:
2114         pr_err("failed to decode auth_bad_method\n");
2115         return -EINVAL;
2116 }
2117
2118 static int process_auth_reply_more(struct ceph_connection *con,
2119                                    void *p, void *end)
2120 {
2121         int payload_len;
2122         int ret;
2123
2124         if (con->state != CEPH_CON_S_V2_AUTH) {
2125                 con->error_msg = "protocol error, unexpected auth_reply_more";
2126                 return -EINVAL;
2127         }
2128
2129         ceph_decode_32_safe(&p, end, payload_len, bad);
2130         ceph_decode_need(&p, end, payload_len, bad);
2131
2132         dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2133
2134         reset_out_kvecs(con);
2135         ret = prepare_auth_request_more(con, p, payload_len);
2136         if (ret) {
2137                 if (ret != -EAGAIN)
2138                         pr_err("prepare_auth_request_more failed: %d\n", ret);
2139                 return ret;
2140         }
2141
2142         return 0;
2143
2144 bad:
2145         pr_err("failed to decode auth_reply_more\n");
2146         return -EINVAL;
2147 }
2148
2149 /*
2150  * Align session_key and con_secret to avoid GFP_ATOMIC allocation
2151  * inside crypto_shash_setkey() and crypto_aead_setkey() called from
2152  * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
2153  * objects, so do it by hand.
2154  */
2155 static int process_auth_done(struct ceph_connection *con, void *p, void *end)
2156 {
2157         u8 session_key_buf[CEPH_KEY_LEN + 16];
2158         u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
2159         u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
2160         u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
2161         int session_key_len, con_secret_len;
2162         int payload_len;
2163         u64 global_id;
2164         int ret;
2165
2166         if (con->state != CEPH_CON_S_V2_AUTH) {
2167                 con->error_msg = "protocol error, unexpected auth_done";
2168                 return -EINVAL;
2169         }
2170
2171         ceph_decode_64_safe(&p, end, global_id, bad);
2172         ceph_decode_32_safe(&p, end, con->v2.con_mode, bad);
2173         ceph_decode_32_safe(&p, end, payload_len, bad);
2174
2175         dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
2176              __func__, con, global_id, con->v2.con_mode, payload_len);
2177
2178         mutex_unlock(&con->mutex);
2179         session_key_len = 0;
2180         con_secret_len = 0;
2181         ret = con->ops->handle_auth_done(con, global_id, p, payload_len,
2182                                          session_key, &session_key_len,
2183                                          con_secret, &con_secret_len);
2184         mutex_lock(&con->mutex);
2185         if (con->state != CEPH_CON_S_V2_AUTH) {
2186                 dout("%s con %p state changed to %d\n", __func__, con,
2187                      con->state);
2188                 ret = -EAGAIN;
2189                 goto out;
2190         }
2191
2192         dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret);
2193         if (ret)
2194                 goto out;
2195
2196         ret = setup_crypto(con, session_key, session_key_len, con_secret,
2197                            con_secret_len);
2198         if (ret)
2199                 goto out;
2200
2201         reset_out_kvecs(con);
2202         ret = prepare_auth_signature(con);
2203         if (ret) {
2204                 pr_err("prepare_auth_signature failed: %d\n", ret);
2205                 goto out;
2206         }
2207
2208         con->state = CEPH_CON_S_V2_AUTH_SIGNATURE;
2209
2210 out:
2211         memzero_explicit(session_key_buf, sizeof(session_key_buf));
2212         memzero_explicit(con_secret_buf, sizeof(con_secret_buf));
2213         return ret;
2214
2215 bad:
2216         pr_err("failed to decode auth_done\n");
2217         return -EINVAL;
2218 }
2219
2220 static int process_auth_signature(struct ceph_connection *con,
2221                                   void *p, void *end)
2222 {
2223         u8 hmac[SHA256_DIGEST_SIZE];
2224         int ret;
2225
2226         if (con->state != CEPH_CON_S_V2_AUTH_SIGNATURE) {
2227                 con->error_msg = "protocol error, unexpected auth_signature";
2228                 return -EINVAL;
2229         }
2230
2231         ret = hmac_sha256(con, con->v2.out_sign_kvecs,
2232                           con->v2.out_sign_kvec_cnt, hmac);
2233         if (ret)
2234                 return ret;
2235
2236         ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad);
2237         if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
2238                 con->error_msg = "integrity error, bad auth signature";
2239                 return -EBADMSG;
2240         }
2241
2242         dout("%s con %p auth signature ok\n", __func__, con);
2243
2244         /* no reset_out_kvecs() as our auth_signature may still be pending */
2245         if (!con->v2.server_cookie) {
2246                 ret = prepare_client_ident(con);
2247                 if (ret) {
2248                         pr_err("prepare_client_ident failed: %d\n", ret);
2249                         return ret;
2250                 }
2251
2252                 con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2253         } else {
2254                 ret = prepare_session_reconnect(con);
2255                 if (ret) {
2256                         pr_err("prepare_session_reconnect failed: %d\n", ret);
2257                         return ret;
2258                 }
2259
2260                 con->state = CEPH_CON_S_V2_SESSION_RECONNECT;
2261         }
2262
2263         return 0;
2264
2265 bad:
2266         pr_err("failed to decode auth_signature\n");
2267         return -EINVAL;
2268 }
2269
2270 static int process_server_ident(struct ceph_connection *con,
2271                                 void *p, void *end)
2272 {
2273         struct ceph_client *client = from_msgr(con->msgr);
2274         u64 features, required_features;
2275         struct ceph_entity_addr addr;
2276         u64 global_seq;
2277         u64 global_id;
2278         u64 cookie;
2279         u64 flags;
2280         int ret;
2281
2282         if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2283                 con->error_msg = "protocol error, unexpected server_ident";
2284                 return -EINVAL;
2285         }
2286
2287         ret = ceph_decode_entity_addrvec(&p, end, true, &addr);
2288         if (ret) {
2289                 pr_err("failed to decode server addrs: %d\n", ret);
2290                 return ret;
2291         }
2292
2293         ceph_decode_64_safe(&p, end, global_id, bad);
2294         ceph_decode_64_safe(&p, end, global_seq, bad);
2295         ceph_decode_64_safe(&p, end, features, bad);
2296         ceph_decode_64_safe(&p, end, required_features, bad);
2297         ceph_decode_64_safe(&p, end, flags, bad);
2298         ceph_decode_64_safe(&p, end, cookie, bad);
2299
2300         dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n",
2301              __func__, con, ceph_pr_addr(&addr), le32_to_cpu(addr.nonce),
2302              global_id, global_seq, features, required_features, flags, cookie);
2303
2304         /* is this who we intended to talk to? */
2305         if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) {
2306                 pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
2307                        ceph_pr_addr(&con->peer_addr),
2308                        le32_to_cpu(con->peer_addr.nonce),
2309                        ceph_pr_addr(&addr), le32_to_cpu(addr.nonce));
2310                 con->error_msg = "wrong peer at address";
2311                 return -EINVAL;
2312         }
2313
2314         if (client->required_features & ~features) {
2315                 pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2316                        features, client->required_features & ~features);
2317                 con->error_msg = "missing required protocol features";
2318                 return -EINVAL;
2319         }
2320
2321         /*
2322          * Both name->type and name->num are set in ceph_con_open() but
2323          * name->num may be bogus in the initial monmap.  name->type is
2324          * verified in handle_hello().
2325          */
2326         WARN_ON(!con->peer_name.type);
2327         con->peer_name.num = cpu_to_le64(global_id);
2328         con->v2.peer_global_seq = global_seq;
2329         con->peer_features = features;
2330         WARN_ON(required_features & ~client->supported_features);
2331         con->v2.server_cookie = cookie;
2332
2333         if (flags & CEPH_MSG_CONNECT_LOSSY) {
2334                 ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
2335                 WARN_ON(con->v2.server_cookie);
2336         } else {
2337                 WARN_ON(!con->v2.server_cookie);
2338         }
2339
2340         clear_in_sign_kvecs(con);
2341         clear_out_sign_kvecs(con);
2342         free_conn_bufs(con);
2343         con->delay = 0;  /* reset backoff memory */
2344
2345         con->state = CEPH_CON_S_OPEN;
2346         con->v2.out_state = OUT_S_GET_NEXT;
2347         return 0;
2348
2349 bad:
2350         pr_err("failed to decode server_ident\n");
2351         return -EINVAL;
2352 }
2353
2354 static int process_ident_missing_features(struct ceph_connection *con,
2355                                           void *p, void *end)
2356 {
2357         struct ceph_client *client = from_msgr(con->msgr);
2358         u64 missing_features;
2359
2360         if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2361                 con->error_msg = "protocol error, unexpected ident_missing_features";
2362                 return -EINVAL;
2363         }
2364
2365         ceph_decode_64_safe(&p, end, missing_features, bad);
2366         pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2367                client->supported_features, missing_features);
2368         con->error_msg = "missing required protocol features";
2369         return -EINVAL;
2370
2371 bad:
2372         pr_err("failed to decode ident_missing_features\n");
2373         return -EINVAL;
2374 }
2375
2376 static int process_session_reconnect_ok(struct ceph_connection *con,
2377                                         void *p, void *end)
2378 {
2379         u64 seq;
2380
2381         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2382                 con->error_msg = "protocol error, unexpected session_reconnect_ok";
2383                 return -EINVAL;
2384         }
2385
2386         ceph_decode_64_safe(&p, end, seq, bad);
2387
2388         dout("%s con %p seq %llu\n", __func__, con, seq);
2389         ceph_con_discard_requeued(con, seq);
2390
2391         clear_in_sign_kvecs(con);
2392         clear_out_sign_kvecs(con);
2393         free_conn_bufs(con);
2394         con->delay = 0;  /* reset backoff memory */
2395
2396         con->state = CEPH_CON_S_OPEN;
2397         con->v2.out_state = OUT_S_GET_NEXT;
2398         return 0;
2399
2400 bad:
2401         pr_err("failed to decode session_reconnect_ok\n");
2402         return -EINVAL;
2403 }
2404
2405 static int process_session_retry(struct ceph_connection *con,
2406                                  void *p, void *end)
2407 {
2408         u64 connect_seq;
2409         int ret;
2410
2411         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2412                 con->error_msg = "protocol error, unexpected session_retry";
2413                 return -EINVAL;
2414         }
2415
2416         ceph_decode_64_safe(&p, end, connect_seq, bad);
2417
2418         dout("%s con %p connect_seq %llu\n", __func__, con, connect_seq);
2419         WARN_ON(connect_seq <= con->v2.connect_seq);
2420         con->v2.connect_seq = connect_seq + 1;
2421
2422         free_conn_bufs(con);
2423
2424         reset_out_kvecs(con);
2425         ret = prepare_session_reconnect(con);
2426         if (ret) {
2427                 pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret);
2428                 return ret;
2429         }
2430
2431         return 0;
2432
2433 bad:
2434         pr_err("failed to decode session_retry\n");
2435         return -EINVAL;
2436 }
2437
2438 static int process_session_retry_global(struct ceph_connection *con,
2439                                         void *p, void *end)
2440 {
2441         u64 global_seq;
2442         int ret;
2443
2444         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2445                 con->error_msg = "protocol error, unexpected session_retry_global";
2446                 return -EINVAL;
2447         }
2448
2449         ceph_decode_64_safe(&p, end, global_seq, bad);
2450
2451         dout("%s con %p global_seq %llu\n", __func__, con, global_seq);
2452         WARN_ON(global_seq <= con->v2.global_seq);
2453         con->v2.global_seq = ceph_get_global_seq(con->msgr, global_seq);
2454
2455         free_conn_bufs(con);
2456
2457         reset_out_kvecs(con);
2458         ret = prepare_session_reconnect(con);
2459         if (ret) {
2460                 pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret);
2461                 return ret;
2462         }
2463
2464         return 0;
2465
2466 bad:
2467         pr_err("failed to decode session_retry_global\n");
2468         return -EINVAL;
2469 }
2470
2471 static int process_session_reset(struct ceph_connection *con,
2472                                  void *p, void *end)
2473 {
2474         bool full;
2475         int ret;
2476
2477         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2478                 con->error_msg = "protocol error, unexpected session_reset";
2479                 return -EINVAL;
2480         }
2481
2482         ceph_decode_8_safe(&p, end, full, bad);
2483         if (!full) {
2484                 con->error_msg = "protocol error, bad session_reset";
2485                 return -EINVAL;
2486         }
2487
2488         pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name),
2489                 ceph_pr_addr(&con->peer_addr));
2490         ceph_con_reset_session(con);
2491
2492         mutex_unlock(&con->mutex);
2493         if (con->ops->peer_reset)
2494                 con->ops->peer_reset(con);
2495         mutex_lock(&con->mutex);
2496         if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2497                 dout("%s con %p state changed to %d\n", __func__, con,
2498                      con->state);
2499                 return -EAGAIN;
2500         }
2501
2502         free_conn_bufs(con);
2503
2504         reset_out_kvecs(con);
2505         ret = prepare_client_ident(con);
2506         if (ret) {
2507                 pr_err("prepare_client_ident (rst) failed: %d\n", ret);
2508                 return ret;
2509         }
2510
2511         con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2512         return 0;
2513
2514 bad:
2515         pr_err("failed to decode session_reset\n");
2516         return -EINVAL;
2517 }
2518
2519 static int process_keepalive2_ack(struct ceph_connection *con,
2520                                   void *p, void *end)
2521 {
2522         if (con->state != CEPH_CON_S_OPEN) {
2523                 con->error_msg = "protocol error, unexpected keepalive2_ack";
2524                 return -EINVAL;
2525         }
2526
2527         ceph_decode_need(&p, end, sizeof(struct ceph_timespec), bad);
2528         ceph_decode_timespec64(&con->last_keepalive_ack, p);
2529
2530         dout("%s con %p timestamp %lld.%09ld\n", __func__, con,
2531              con->last_keepalive_ack.tv_sec, con->last_keepalive_ack.tv_nsec);
2532
2533         return 0;
2534
2535 bad:
2536         pr_err("failed to decode keepalive2_ack\n");
2537         return -EINVAL;
2538 }
2539
2540 static int process_ack(struct ceph_connection *con, void *p, void *end)
2541 {
2542         u64 seq;
2543
2544         if (con->state != CEPH_CON_S_OPEN) {
2545                 con->error_msg = "protocol error, unexpected ack";
2546                 return -EINVAL;
2547         }
2548
2549         ceph_decode_64_safe(&p, end, seq, bad);
2550
2551         dout("%s con %p seq %llu\n", __func__, con, seq);
2552         ceph_con_discard_sent(con, seq);
2553         return 0;
2554
2555 bad:
2556         pr_err("failed to decode ack\n");
2557         return -EINVAL;
2558 }
2559
2560 static int process_control(struct ceph_connection *con, void *p, void *end)
2561 {
2562         int tag = con->v2.in_desc.fd_tag;
2563         int ret;
2564
2565         dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p));
2566
2567         switch (tag) {
2568         case FRAME_TAG_HELLO:
2569                 ret = process_hello(con, p, end);
2570                 break;
2571         case FRAME_TAG_AUTH_BAD_METHOD:
2572                 ret = process_auth_bad_method(con, p, end);
2573                 break;
2574         case FRAME_TAG_AUTH_REPLY_MORE:
2575                 ret = process_auth_reply_more(con, p, end);
2576                 break;
2577         case FRAME_TAG_AUTH_DONE:
2578                 ret = process_auth_done(con, p, end);
2579                 break;
2580         case FRAME_TAG_AUTH_SIGNATURE:
2581                 ret = process_auth_signature(con, p, end);
2582                 break;
2583         case FRAME_TAG_SERVER_IDENT:
2584                 ret = process_server_ident(con, p, end);
2585                 break;
2586         case FRAME_TAG_IDENT_MISSING_FEATURES:
2587                 ret = process_ident_missing_features(con, p, end);
2588                 break;
2589         case FRAME_TAG_SESSION_RECONNECT_OK:
2590                 ret = process_session_reconnect_ok(con, p, end);
2591                 break;
2592         case FRAME_TAG_SESSION_RETRY:
2593                 ret = process_session_retry(con, p, end);
2594                 break;
2595         case FRAME_TAG_SESSION_RETRY_GLOBAL:
2596                 ret = process_session_retry_global(con, p, end);
2597                 break;
2598         case FRAME_TAG_SESSION_RESET:
2599                 ret = process_session_reset(con, p, end);
2600                 break;
2601         case FRAME_TAG_KEEPALIVE2_ACK:
2602                 ret = process_keepalive2_ack(con, p, end);
2603                 break;
2604         case FRAME_TAG_ACK:
2605                 ret = process_ack(con, p, end);
2606                 break;
2607         default:
2608                 pr_err("bad tag %d\n", tag);
2609                 con->error_msg = "protocol error, bad tag";
2610                 return -EINVAL;
2611         }
2612         if (ret) {
2613                 dout("%s con %p error %d\n", __func__, con, ret);
2614                 return ret;
2615         }
2616
2617         prepare_read_preamble(con);
2618         return 0;
2619 }
2620
2621 /*
2622  * Return:
2623  *   1 - con->in_msg set, read message
2624  *   0 - skip message
2625  *  <0 - error
2626  */
2627 static int process_message_header(struct ceph_connection *con,
2628                                   void *p, void *end)
2629 {
2630         struct ceph_frame_desc *desc = &con->v2.in_desc;
2631         struct ceph_msg_header2 *hdr2 = p;
2632         struct ceph_msg_header hdr;
2633         int skip;
2634         int ret;
2635         u64 seq;
2636
2637         /* verify seq# */
2638         seq = le64_to_cpu(hdr2->seq);
2639         if ((s64)seq - (s64)con->in_seq < 1) {
2640                 pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n",
2641                         ENTITY_NAME(con->peer_name),
2642                         ceph_pr_addr(&con->peer_addr),
2643                         seq, con->in_seq + 1);
2644                 return 0;
2645         }
2646         if ((s64)seq - (s64)con->in_seq > 1) {
2647                 pr_err("bad seq %llu, expected %llu\n", seq, con->in_seq + 1);
2648                 con->error_msg = "bad message sequence # for incoming message";
2649                 return -EBADE;
2650         }
2651
2652         ceph_con_discard_sent(con, le64_to_cpu(hdr2->ack_seq));
2653
2654         fill_header(&hdr, hdr2, desc->fd_lens[1], desc->fd_lens[2],
2655                     desc->fd_lens[3], &con->peer_name);
2656         ret = ceph_con_in_msg_alloc(con, &hdr, &skip);
2657         if (ret)
2658                 return ret;
2659
2660         WARN_ON(!con->in_msg ^ skip);
2661         if (skip)
2662                 return 0;
2663
2664         WARN_ON(!con->in_msg);
2665         WARN_ON(con->in_msg->con != con);
2666         return 1;
2667 }
2668
2669 static int process_message(struct ceph_connection *con)
2670 {
2671         ceph_con_process_message(con);
2672
2673         /*
2674          * We could have been closed by ceph_con_close() because
2675          * ceph_con_process_message() temporarily drops con->mutex.
2676          */
2677         if (con->state != CEPH_CON_S_OPEN) {
2678                 dout("%s con %p state changed to %d\n", __func__, con,
2679                      con->state);
2680                 return -EAGAIN;
2681         }
2682
2683         prepare_read_preamble(con);
2684         return 0;
2685 }
2686
2687 static int __handle_control(struct ceph_connection *con, void *p)
2688 {
2689         void *end = p + con->v2.in_desc.fd_lens[0];
2690         struct ceph_msg *msg;
2691         int ret;
2692
2693         if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE)
2694                 return process_control(con, p, end);
2695
2696         ret = process_message_header(con, p, end);
2697         if (ret < 0)
2698                 return ret;
2699         if (ret == 0) {
2700                 prepare_skip_message(con);
2701                 return 0;
2702         }
2703
2704         msg = con->in_msg;  /* set in process_message_header() */
2705         if (front_len(msg)) {
2706                 WARN_ON(front_len(msg) > msg->front_alloc_len);
2707                 msg->front.iov_len = front_len(msg);
2708         } else {
2709                 msg->front.iov_len = 0;
2710         }
2711         if (middle_len(msg)) {
2712                 WARN_ON(middle_len(msg) > msg->middle->alloc_len);
2713                 msg->middle->vec.iov_len = middle_len(msg);
2714         } else if (msg->middle) {
2715                 msg->middle->vec.iov_len = 0;
2716         }
2717
2718         if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
2719                 return process_message(con);
2720
2721         if (con_secure(con))
2722                 return prepare_read_tail_secure(con);
2723
2724         return prepare_read_tail_plain(con);
2725 }
2726
2727 static int handle_preamble(struct ceph_connection *con)
2728 {
2729         struct ceph_frame_desc *desc = &con->v2.in_desc;
2730         int ret;
2731
2732         if (con_secure(con)) {
2733                 ret = decrypt_preamble(con);
2734                 if (ret) {
2735                         if (ret == -EBADMSG)
2736                                 con->error_msg = "integrity error, bad preamble auth tag";
2737                         return ret;
2738                 }
2739         }
2740
2741         ret = decode_preamble(con->v2.in_buf, desc);
2742         if (ret) {
2743                 if (ret == -EBADMSG)
2744                         con->error_msg = "integrity error, bad crc";
2745                 else
2746                         con->error_msg = "protocol error, bad preamble";
2747                 return ret;
2748         }
2749
2750         dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__,
2751              con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0],
2752              desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]);
2753
2754         if (!con_secure(con))
2755                 return prepare_read_control(con);
2756
2757         if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN)
2758                 return prepare_read_control_remainder(con);
2759
2760         return __handle_control(con, CTRL_BODY(con->v2.in_buf));
2761 }
2762
2763 static int handle_control(struct ceph_connection *con)
2764 {
2765         int ctrl_len = con->v2.in_desc.fd_lens[0];
2766         void *buf;
2767         int ret;
2768
2769         WARN_ON(con_secure(con));
2770
2771         ret = verify_control_crc(con);
2772         if (ret) {
2773                 con->error_msg = "integrity error, bad crc";
2774                 return ret;
2775         }
2776
2777         if (con->state == CEPH_CON_S_V2_AUTH) {
2778                 buf = alloc_conn_buf(con, ctrl_len);
2779                 if (!buf)
2780                         return -ENOMEM;
2781
2782                 memcpy(buf, con->v2.in_kvecs[0].iov_base, ctrl_len);
2783                 return __handle_control(con, buf);
2784         }
2785
2786         return __handle_control(con, con->v2.in_kvecs[0].iov_base);
2787 }
2788
2789 static int handle_control_remainder(struct ceph_connection *con)
2790 {
2791         int ret;
2792
2793         WARN_ON(!con_secure(con));
2794
2795         ret = decrypt_control_remainder(con);
2796         if (ret) {
2797                 if (ret == -EBADMSG)
2798                         con->error_msg = "integrity error, bad control remainder auth tag";
2799                 return ret;
2800         }
2801
2802         return __handle_control(con, con->v2.in_kvecs[0].iov_base -
2803                                      CEPH_PREAMBLE_INLINE_LEN);
2804 }
2805
2806 static int handle_epilogue(struct ceph_connection *con)
2807 {
2808         u32 front_crc, middle_crc, data_crc;
2809         int ret;
2810
2811         if (con_secure(con)) {
2812                 ret = decrypt_tail(con);
2813                 if (ret) {
2814                         if (ret == -EBADMSG)
2815                                 con->error_msg = "integrity error, bad epilogue auth tag";
2816                         return ret;
2817                 }
2818
2819                 /* just late_status */
2820                 ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL);
2821                 if (ret) {
2822                         con->error_msg = "protocol error, bad epilogue";
2823                         return ret;
2824                 }
2825         } else {
2826                 ret = decode_epilogue(con->v2.in_buf, &front_crc,
2827                                       &middle_crc, &data_crc);
2828                 if (ret) {
2829                         con->error_msg = "protocol error, bad epilogue";
2830                         return ret;
2831                 }
2832
2833                 ret = verify_epilogue_crcs(con, front_crc, middle_crc,
2834                                            data_crc);
2835                 if (ret) {
2836                         con->error_msg = "integrity error, bad crc";
2837                         return ret;
2838                 }
2839         }
2840
2841         return process_message(con);
2842 }
2843
2844 static void finish_skip(struct ceph_connection *con)
2845 {
2846         dout("%s con %p\n", __func__, con);
2847
2848         if (con_secure(con))
2849                 gcm_inc_nonce(&con->v2.in_gcm_nonce);
2850
2851         __finish_skip(con);
2852 }
2853
2854 static int populate_in_iter(struct ceph_connection *con)
2855 {
2856         int ret;
2857
2858         dout("%s con %p state %d in_state %d\n", __func__, con, con->state,
2859              con->v2.in_state);
2860         WARN_ON(iov_iter_count(&con->v2.in_iter));
2861
2862         if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) {
2863                 ret = process_banner_prefix(con);
2864         } else if (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) {
2865                 ret = process_banner_payload(con);
2866         } else if ((con->state >= CEPH_CON_S_V2_HELLO &&
2867                     con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) ||
2868                    con->state == CEPH_CON_S_OPEN) {
2869                 switch (con->v2.in_state) {
2870                 case IN_S_HANDLE_PREAMBLE:
2871                         ret = handle_preamble(con);
2872                         break;
2873                 case IN_S_HANDLE_CONTROL:
2874                         ret = handle_control(con);
2875                         break;
2876                 case IN_S_HANDLE_CONTROL_REMAINDER:
2877                         ret = handle_control_remainder(con);
2878                         break;
2879                 case IN_S_PREPARE_READ_DATA:
2880                         ret = prepare_read_data(con);
2881                         break;
2882                 case IN_S_PREPARE_READ_DATA_CONT:
2883                         prepare_read_data_cont(con);
2884                         ret = 0;
2885                         break;
2886                 case IN_S_PREPARE_READ_ENC_PAGE:
2887                         prepare_read_enc_page(con);
2888                         ret = 0;
2889                         break;
2890                 case IN_S_HANDLE_EPILOGUE:
2891                         ret = handle_epilogue(con);
2892                         break;
2893                 case IN_S_FINISH_SKIP:
2894                         finish_skip(con);
2895                         ret = 0;
2896                         break;
2897                 default:
2898                         WARN(1, "bad in_state %d", con->v2.in_state);
2899                         return -EINVAL;
2900                 }
2901         } else {
2902                 WARN(1, "bad state %d", con->state);
2903                 return -EINVAL;
2904         }
2905         if (ret) {
2906                 dout("%s con %p error %d\n", __func__, con, ret);
2907                 return ret;
2908         }
2909
2910         if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
2911                 return -ENODATA;
2912         dout("%s con %p populated %zu\n", __func__, con,
2913              iov_iter_count(&con->v2.in_iter));
2914         return 1;
2915 }
2916
2917 int ceph_con_v2_try_read(struct ceph_connection *con)
2918 {
2919         int ret;
2920
2921         dout("%s con %p state %d need %zu\n", __func__, con, con->state,
2922              iov_iter_count(&con->v2.in_iter));
2923
2924         if (con->state == CEPH_CON_S_PREOPEN)
2925                 return 0;
2926
2927         /*
2928          * We should always have something pending here.  If not,
2929          * avoid calling populate_in_iter() as if we read something
2930          * (ceph_tcp_recv() would immediately return 1).
2931          */
2932         if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
2933                 return -ENODATA;
2934
2935         for (;;) {
2936                 ret = ceph_tcp_recv(con);
2937                 if (ret <= 0)
2938                         return ret;
2939
2940                 ret = populate_in_iter(con);
2941                 if (ret <= 0) {
2942                         if (ret && ret != -EAGAIN && !con->error_msg)
2943                                 con->error_msg = "read processing error";
2944                         return ret;
2945                 }
2946         }
2947 }
2948
2949 static void queue_data(struct ceph_connection *con)
2950 {
2951         struct bio_vec bv;
2952
2953         con->v2.out_epil.data_crc = -1;
2954         ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg,
2955                                   data_len(con->out_msg));
2956
2957         get_bvec_at(&con->v2.out_cursor, &bv);
2958         set_out_bvec(con, &bv, true);
2959         con->v2.out_state = OUT_S_QUEUE_DATA_CONT;
2960 }
2961
2962 static void queue_data_cont(struct ceph_connection *con)
2963 {
2964         struct bio_vec bv;
2965
2966         con->v2.out_epil.data_crc = ceph_crc32c_page(
2967                 con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
2968                 con->v2.out_bvec.bv_offset, con->v2.out_bvec.bv_len);
2969
2970         ceph_msg_data_advance(&con->v2.out_cursor, con->v2.out_bvec.bv_len);
2971         if (con->v2.out_cursor.total_resid) {
2972                 get_bvec_at(&con->v2.out_cursor, &bv);
2973                 set_out_bvec(con, &bv, true);
2974                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_DATA_CONT);
2975                 return;
2976         }
2977
2978         /*
2979          * We've written all data.  Queue epilogue.  Once it's written,
2980          * we are done.
2981          */
2982         reset_out_kvecs(con);
2983         prepare_epilogue_plain(con, false);
2984         con->v2.out_state = OUT_S_FINISH_MESSAGE;
2985 }
2986
2987 static void queue_enc_page(struct ceph_connection *con)
2988 {
2989         struct bio_vec bv;
2990
2991         dout("%s con %p i %d resid %d\n", __func__, con, con->v2.out_enc_i,
2992              con->v2.out_enc_resid);
2993         WARN_ON(!con->v2.out_enc_resid);
2994
2995         bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i],
2996                       min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0);
2997
2998         set_out_bvec(con, &bv, false);
2999         con->v2.out_enc_i++;
3000         con->v2.out_enc_resid -= bv.bv_len;
3001
3002         if (con->v2.out_enc_resid) {
3003                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE);
3004                 return;
3005         }
3006
3007         /*
3008          * We've queued the last piece of ciphertext (ending with
3009          * epilogue) + auth tag.  Once it's written, we are done.
3010          */
3011         WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt);
3012         con->v2.out_state = OUT_S_FINISH_MESSAGE;
3013 }
3014
3015 static void queue_zeros(struct ceph_connection *con)
3016 {
3017         dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero);
3018
3019         if (con->v2.out_zero) {
3020                 set_out_bvec_zero(con);
3021                 con->v2.out_zero -= con->v2.out_bvec.bv_len;
3022                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3023                 return;
3024         }
3025
3026         /*
3027          * We've zero-filled everything up to epilogue.  Queue epilogue
3028          * with late_status set to ABORTED and crcs adjusted for zeros.
3029          * Once it's written, we are done patching up for the revoke.
3030          */
3031         reset_out_kvecs(con);
3032         prepare_epilogue_plain(con, true);
3033         con->v2.out_state = OUT_S_FINISH_MESSAGE;
3034 }
3035
3036 static void finish_message(struct ceph_connection *con)
3037 {
3038         dout("%s con %p msg %p\n", __func__, con, con->out_msg);
3039
3040         /* we end up here both plain and secure modes */
3041         if (con->v2.out_enc_pages) {
3042                 WARN_ON(!con->v2.out_enc_page_cnt);
3043                 ceph_release_page_vector(con->v2.out_enc_pages,
3044                                          con->v2.out_enc_page_cnt);
3045                 con->v2.out_enc_pages = NULL;
3046                 con->v2.out_enc_page_cnt = 0;
3047         }
3048         /* message may have been revoked */
3049         if (con->out_msg) {
3050                 ceph_msg_put(con->out_msg);
3051                 con->out_msg = NULL;
3052         }
3053
3054         con->v2.out_state = OUT_S_GET_NEXT;
3055 }
3056
3057 static int populate_out_iter(struct ceph_connection *con)
3058 {
3059         int ret;
3060
3061         dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
3062              con->v2.out_state);
3063         WARN_ON(iov_iter_count(&con->v2.out_iter));
3064
3065         if (con->state != CEPH_CON_S_OPEN) {
3066                 WARN_ON(con->state < CEPH_CON_S_V2_BANNER_PREFIX ||
3067                         con->state > CEPH_CON_S_V2_SESSION_RECONNECT);
3068                 goto nothing_pending;
3069         }
3070
3071         switch (con->v2.out_state) {
3072         case OUT_S_QUEUE_DATA:
3073                 WARN_ON(!con->out_msg);
3074                 queue_data(con);
3075                 goto populated;
3076         case OUT_S_QUEUE_DATA_CONT:
3077                 WARN_ON(!con->out_msg);
3078                 queue_data_cont(con);
3079                 goto populated;
3080         case OUT_S_QUEUE_ENC_PAGE:
3081                 queue_enc_page(con);
3082                 goto populated;
3083         case OUT_S_QUEUE_ZEROS:
3084                 WARN_ON(con->out_msg);  /* revoked */
3085                 queue_zeros(con);
3086                 goto populated;
3087         case OUT_S_FINISH_MESSAGE:
3088                 finish_message(con);
3089                 break;
3090         case OUT_S_GET_NEXT:
3091                 break;
3092         default:
3093                 WARN(1, "bad out_state %d", con->v2.out_state);
3094                 return -EINVAL;
3095         }
3096
3097         WARN_ON(con->v2.out_state != OUT_S_GET_NEXT);
3098         if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) {
3099                 ret = prepare_keepalive2(con);
3100                 if (ret) {
3101                         pr_err("prepare_keepalive2 failed: %d\n", ret);
3102                         return ret;
3103                 }
3104         } else if (!list_empty(&con->out_queue)) {
3105                 ceph_con_get_out_msg(con);
3106                 ret = prepare_message(con);
3107                 if (ret) {
3108                         pr_err("prepare_message failed: %d\n", ret);
3109                         return ret;
3110                 }
3111         } else if (con->in_seq > con->in_seq_acked) {
3112                 ret = prepare_ack(con);
3113                 if (ret) {
3114                         pr_err("prepare_ack failed: %d\n", ret);
3115                         return ret;
3116                 }
3117         } else {
3118                 goto nothing_pending;
3119         }
3120
3121 populated:
3122         if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
3123                 return -ENODATA;
3124         dout("%s con %p populated %zu\n", __func__, con,
3125              iov_iter_count(&con->v2.out_iter));
3126         return 1;
3127
3128 nothing_pending:
3129         WARN_ON(iov_iter_count(&con->v2.out_iter));
3130         dout("%s con %p nothing pending\n", __func__, con);
3131         ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
3132         return 0;
3133 }
3134
3135 int ceph_con_v2_try_write(struct ceph_connection *con)
3136 {
3137         int ret;
3138
3139         dout("%s con %p state %d have %zu\n", __func__, con, con->state,
3140              iov_iter_count(&con->v2.out_iter));
3141
3142         /* open the socket first? */
3143         if (con->state == CEPH_CON_S_PREOPEN) {
3144                 WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2);
3145
3146                 /*
3147                  * Always bump global_seq.  Bump connect_seq only if
3148                  * there is a session (i.e. we are reconnecting and will
3149                  * send session_reconnect instead of client_ident).
3150                  */
3151                 con->v2.global_seq = ceph_get_global_seq(con->msgr, 0);
3152                 if (con->v2.server_cookie)
3153                         con->v2.connect_seq++;
3154
3155                 ret = prepare_read_banner_prefix(con);
3156                 if (ret) {
3157                         pr_err("prepare_read_banner_prefix failed: %d\n", ret);
3158                         con->error_msg = "connect error";
3159                         return ret;
3160                 }
3161
3162                 reset_out_kvecs(con);
3163                 ret = prepare_banner(con);
3164                 if (ret) {
3165                         pr_err("prepare_banner failed: %d\n", ret);
3166                         con->error_msg = "connect error";
3167                         return ret;
3168                 }
3169
3170                 ret = ceph_tcp_connect(con);
3171                 if (ret) {
3172                         pr_err("ceph_tcp_connect failed: %d\n", ret);
3173                         con->error_msg = "connect error";
3174                         return ret;
3175                 }
3176         }
3177
3178         if (!iov_iter_count(&con->v2.out_iter)) {
3179                 ret = populate_out_iter(con);
3180                 if (ret <= 0) {
3181                         if (ret && ret != -EAGAIN && !con->error_msg)
3182                                 con->error_msg = "write processing error";
3183                         return ret;
3184                 }
3185         }
3186
3187         tcp_sock_set_cork(con->sock->sk, true);
3188         for (;;) {
3189                 ret = ceph_tcp_send(con);
3190                 if (ret <= 0)
3191                         break;
3192
3193                 ret = populate_out_iter(con);
3194                 if (ret <= 0) {
3195                         if (ret && ret != -EAGAIN && !con->error_msg)
3196                                 con->error_msg = "write processing error";
3197                         break;
3198                 }
3199         }
3200
3201         tcp_sock_set_cork(con->sock->sk, false);
3202         return ret;
3203 }
3204
3205 static u32 crc32c_zeros(u32 crc, int zero_len)
3206 {
3207         int len;
3208
3209         while (zero_len) {
3210                 len = min(zero_len, (int)PAGE_SIZE);
3211                 crc = crc32c(crc, page_address(ceph_zero_page), len);
3212                 zero_len -= len;
3213         }
3214
3215         return crc;
3216 }
3217
3218 static void prepare_zero_front(struct ceph_connection *con, int resid)
3219 {
3220         int sent;
3221
3222         WARN_ON(!resid || resid > front_len(con->out_msg));
3223         sent = front_len(con->out_msg) - resid;
3224         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3225
3226         if (sent) {
3227                 con->v2.out_epil.front_crc =
3228                         crc32c(-1, con->out_msg->front.iov_base, sent);
3229                 con->v2.out_epil.front_crc =
3230                         crc32c_zeros(con->v2.out_epil.front_crc, resid);
3231         } else {
3232                 con->v2.out_epil.front_crc = crc32c_zeros(-1, resid);
3233         }
3234
3235         con->v2.out_iter.count -= resid;
3236         out_zero_add(con, resid);
3237 }
3238
3239 static void prepare_zero_middle(struct ceph_connection *con, int resid)
3240 {
3241         int sent;
3242
3243         WARN_ON(!resid || resid > middle_len(con->out_msg));
3244         sent = middle_len(con->out_msg) - resid;
3245         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3246
3247         if (sent) {
3248                 con->v2.out_epil.middle_crc =
3249                         crc32c(-1, con->out_msg->middle->vec.iov_base, sent);
3250                 con->v2.out_epil.middle_crc =
3251                         crc32c_zeros(con->v2.out_epil.middle_crc, resid);
3252         } else {
3253                 con->v2.out_epil.middle_crc = crc32c_zeros(-1, resid);
3254         }
3255
3256         con->v2.out_iter.count -= resid;
3257         out_zero_add(con, resid);
3258 }
3259
3260 static void prepare_zero_data(struct ceph_connection *con)
3261 {
3262         dout("%s con %p\n", __func__, con);
3263         con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg));
3264         out_zero_add(con, data_len(con->out_msg));
3265 }
3266
3267 static void revoke_at_queue_data(struct ceph_connection *con)
3268 {
3269         int boundary;
3270         int resid;
3271
3272         WARN_ON(!data_len(con->out_msg));
3273         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3274         resid = iov_iter_count(&con->v2.out_iter);
3275
3276         boundary = front_len(con->out_msg) + middle_len(con->out_msg);
3277         if (resid > boundary) {
3278                 resid -= boundary;
3279                 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3280                 dout("%s con %p was sending head\n", __func__, con);
3281                 if (front_len(con->out_msg))
3282                         prepare_zero_front(con, front_len(con->out_msg));
3283                 if (middle_len(con->out_msg))
3284                         prepare_zero_middle(con, middle_len(con->out_msg));
3285                 prepare_zero_data(con);
3286                 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3287                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3288                 return;
3289         }
3290
3291         boundary = middle_len(con->out_msg);
3292         if (resid > boundary) {
3293                 resid -= boundary;
3294                 dout("%s con %p was sending front\n", __func__, con);
3295                 prepare_zero_front(con, resid);
3296                 if (middle_len(con->out_msg))
3297                         prepare_zero_middle(con, middle_len(con->out_msg));
3298                 prepare_zero_data(con);
3299                 queue_zeros(con);
3300                 return;
3301         }
3302
3303         WARN_ON(!resid);
3304         dout("%s con %p was sending middle\n", __func__, con);
3305         prepare_zero_middle(con, resid);
3306         prepare_zero_data(con);
3307         queue_zeros(con);
3308 }
3309
3310 static void revoke_at_queue_data_cont(struct ceph_connection *con)
3311 {
3312         int sent, resid;  /* current piece of data */
3313
3314         WARN_ON(!data_len(con->out_msg));
3315         WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter));
3316         resid = iov_iter_count(&con->v2.out_iter);
3317         WARN_ON(!resid || resid > con->v2.out_bvec.bv_len);
3318         sent = con->v2.out_bvec.bv_len - resid;
3319         dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3320
3321         if (sent) {
3322                 con->v2.out_epil.data_crc = ceph_crc32c_page(
3323                         con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3324                         con->v2.out_bvec.bv_offset, sent);
3325                 ceph_msg_data_advance(&con->v2.out_cursor, sent);
3326         }
3327         WARN_ON(resid > con->v2.out_cursor.total_resid);
3328         con->v2.out_epil.data_crc = crc32c_zeros(con->v2.out_epil.data_crc,
3329                                                 con->v2.out_cursor.total_resid);
3330
3331         con->v2.out_iter.count -= resid;
3332         out_zero_add(con, con->v2.out_cursor.total_resid);
3333         queue_zeros(con);
3334 }
3335
3336 static void revoke_at_finish_message(struct ceph_connection *con)
3337 {
3338         int boundary;
3339         int resid;
3340
3341         WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3342         resid = iov_iter_count(&con->v2.out_iter);
3343
3344         if (!front_len(con->out_msg) && !middle_len(con->out_msg) &&
3345             !data_len(con->out_msg)) {
3346                 WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN);
3347                 dout("%s con %p was sending head (empty message) - noop\n",
3348                      __func__, con);
3349                 return;
3350         }
3351
3352         boundary = front_len(con->out_msg) + middle_len(con->out_msg) +
3353                    CEPH_EPILOGUE_PLAIN_LEN;
3354         if (resid > boundary) {
3355                 resid -= boundary;
3356                 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3357                 dout("%s con %p was sending head\n", __func__, con);
3358                 if (front_len(con->out_msg))
3359                         prepare_zero_front(con, front_len(con->out_msg));
3360                 if (middle_len(con->out_msg))
3361                         prepare_zero_middle(con, middle_len(con->out_msg));
3362                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3363                 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3364                 con->v2.out_state = OUT_S_QUEUE_ZEROS;
3365                 return;
3366         }
3367
3368         boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3369         if (resid > boundary) {
3370                 resid -= boundary;
3371                 dout("%s con %p was sending front\n", __func__, con);
3372                 prepare_zero_front(con, resid);
3373                 if (middle_len(con->out_msg))
3374                         prepare_zero_middle(con, middle_len(con->out_msg));
3375                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3376                 queue_zeros(con);
3377                 return;
3378         }
3379
3380         boundary = CEPH_EPILOGUE_PLAIN_LEN;
3381         if (resid > boundary) {
3382                 resid -= boundary;
3383                 dout("%s con %p was sending middle\n", __func__, con);
3384                 prepare_zero_middle(con, resid);
3385                 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3386                 queue_zeros(con);
3387                 return;
3388         }
3389
3390         WARN_ON(!resid);
3391         dout("%s con %p was sending epilogue - noop\n", __func__, con);
3392 }
3393
3394 void ceph_con_v2_revoke(struct ceph_connection *con)
3395 {
3396         WARN_ON(con->v2.out_zero);
3397
3398         if (con_secure(con)) {
3399                 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE &&
3400                         con->v2.out_state != OUT_S_FINISH_MESSAGE);
3401                 dout("%s con %p secure - noop\n", __func__, con);
3402                 return;
3403         }
3404
3405         switch (con->v2.out_state) {
3406         case OUT_S_QUEUE_DATA:
3407                 revoke_at_queue_data(con);
3408                 break;
3409         case OUT_S_QUEUE_DATA_CONT:
3410                 revoke_at_queue_data_cont(con);
3411                 break;
3412         case OUT_S_FINISH_MESSAGE:
3413                 revoke_at_finish_message(con);
3414                 break;
3415         default:
3416                 WARN(1, "bad out_state %d", con->v2.out_state);
3417                 break;
3418         }
3419 }
3420
3421 static void revoke_at_prepare_read_data(struct ceph_connection *con)
3422 {
3423         int remaining;
3424         int resid;
3425
3426         WARN_ON(con_secure(con));
3427         WARN_ON(!data_len(con->in_msg));
3428         WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
3429         resid = iov_iter_count(&con->v2.in_iter);
3430         WARN_ON(!resid);
3431
3432         remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3433         dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
3434              remaining);
3435         con->v2.in_iter.count -= resid;
3436         set_in_skip(con, resid + remaining);
3437         con->v2.in_state = IN_S_FINISH_SKIP;
3438 }
3439
3440 static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
3441 {
3442         int recved, resid;  /* current piece of data */
3443         int remaining;
3444
3445         WARN_ON(con_secure(con));
3446         WARN_ON(!data_len(con->in_msg));
3447         WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3448         resid = iov_iter_count(&con->v2.in_iter);
3449         WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3450         recved = con->v2.in_bvec.bv_len - resid;
3451         dout("%s con %p recved %d resid %d\n", __func__, con, recved, resid);
3452
3453         if (recved)
3454                 ceph_msg_data_advance(&con->v2.in_cursor, recved);
3455         WARN_ON(resid > con->v2.in_cursor.total_resid);
3456
3457         remaining = CEPH_EPILOGUE_PLAIN_LEN;
3458         dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
3459              con->v2.in_cursor.total_resid, remaining);
3460         con->v2.in_iter.count -= resid;
3461         set_in_skip(con, con->v2.in_cursor.total_resid + remaining);
3462         con->v2.in_state = IN_S_FINISH_SKIP;
3463 }
3464
3465 static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
3466 {
3467         int resid;  /* current enc page (not necessarily data) */
3468
3469         WARN_ON(!con_secure(con));
3470         WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3471         resid = iov_iter_count(&con->v2.in_iter);
3472         WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3473
3474         dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
3475              con->v2.in_enc_resid);
3476         con->v2.in_iter.count -= resid;
3477         set_in_skip(con, resid + con->v2.in_enc_resid);
3478         con->v2.in_state = IN_S_FINISH_SKIP;
3479 }
3480
3481 static void revoke_at_handle_epilogue(struct ceph_connection *con)
3482 {
3483         int resid;
3484
3485         resid = iov_iter_count(&con->v2.in_iter);
3486         WARN_ON(!resid);
3487
3488         dout("%s con %p resid %d\n", __func__, con, resid);
3489         con->v2.in_iter.count -= resid;
3490         set_in_skip(con, resid);
3491         con->v2.in_state = IN_S_FINISH_SKIP;
3492 }
3493
3494 void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
3495 {
3496         switch (con->v2.in_state) {
3497         case IN_S_PREPARE_READ_DATA:
3498                 revoke_at_prepare_read_data(con);
3499                 break;
3500         case IN_S_PREPARE_READ_DATA_CONT:
3501                 revoke_at_prepare_read_data_cont(con);
3502                 break;
3503         case IN_S_PREPARE_READ_ENC_PAGE:
3504                 revoke_at_prepare_read_enc_page(con);
3505                 break;
3506         case IN_S_HANDLE_EPILOGUE:
3507                 revoke_at_handle_epilogue(con);
3508                 break;
3509         default:
3510                 WARN(1, "bad in_state %d", con->v2.in_state);
3511                 break;
3512         }
3513 }
3514
3515 bool ceph_con_v2_opened(struct ceph_connection *con)
3516 {
3517         return con->v2.peer_global_seq;
3518 }
3519
3520 void ceph_con_v2_reset_session(struct ceph_connection *con)
3521 {
3522         con->v2.client_cookie = 0;
3523         con->v2.server_cookie = 0;
3524         con->v2.global_seq = 0;
3525         con->v2.connect_seq = 0;
3526         con->v2.peer_global_seq = 0;
3527 }
3528
3529 void ceph_con_v2_reset_protocol(struct ceph_connection *con)
3530 {
3531         iov_iter_truncate(&con->v2.in_iter, 0);
3532         iov_iter_truncate(&con->v2.out_iter, 0);
3533         con->v2.out_zero = 0;
3534
3535         clear_in_sign_kvecs(con);
3536         clear_out_sign_kvecs(con);
3537         free_conn_bufs(con);
3538
3539         if (con->v2.in_enc_pages) {
3540                 WARN_ON(!con->v2.in_enc_page_cnt);
3541                 ceph_release_page_vector(con->v2.in_enc_pages,
3542                                          con->v2.in_enc_page_cnt);
3543                 con->v2.in_enc_pages = NULL;
3544                 con->v2.in_enc_page_cnt = 0;
3545         }
3546         if (con->v2.out_enc_pages) {
3547                 WARN_ON(!con->v2.out_enc_page_cnt);
3548                 ceph_release_page_vector(con->v2.out_enc_pages,
3549                                          con->v2.out_enc_page_cnt);
3550                 con->v2.out_enc_pages = NULL;
3551                 con->v2.out_enc_page_cnt = 0;
3552         }
3553
3554         con->v2.con_mode = CEPH_CON_MODE_UNKNOWN;
3555         memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
3556         memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
3557
3558         if (con->v2.hmac_tfm) {
3559                 crypto_free_shash(con->v2.hmac_tfm);
3560                 con->v2.hmac_tfm = NULL;
3561         }
3562         if (con->v2.gcm_req) {
3563                 aead_request_free(con->v2.gcm_req);
3564                 con->v2.gcm_req = NULL;
3565         }
3566         if (con->v2.gcm_tfm) {
3567                 crypto_free_aead(con->v2.gcm_tfm);
3568                 con->v2.gcm_tfm = NULL;
3569         }
3570 }