lib/test_bitops: do the full test during module init
[platform/kernel/linux-starfive.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/uio.h>
6 #include <linux/pagemap.h>
7 #include <linux/slab.h>
8 #include <linux/vmalloc.h>
9 #include <linux/splice.h>
10 #include <net/checksum.h>
11 #include <linux/scatterlist.h>
12 #include <linux/instrumented.h>
13
14 #define PIPE_PARANOIA /* for now */
15
16 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
17         size_t left;                                    \
18         size_t wanted = n;                              \
19         __p = i->iov;                                   \
20         __v.iov_len = min(n, __p->iov_len - skip);      \
21         if (likely(__v.iov_len)) {                      \
22                 __v.iov_base = __p->iov_base + skip;    \
23                 left = (STEP);                          \
24                 __v.iov_len -= left;                    \
25                 skip += __v.iov_len;                    \
26                 n -= __v.iov_len;                       \
27         } else {                                        \
28                 left = 0;                               \
29         }                                               \
30         while (unlikely(!left && n)) {                  \
31                 __p++;                                  \
32                 __v.iov_len = min(n, __p->iov_len);     \
33                 if (unlikely(!__v.iov_len))             \
34                         continue;                       \
35                 __v.iov_base = __p->iov_base;           \
36                 left = (STEP);                          \
37                 __v.iov_len -= left;                    \
38                 skip = __v.iov_len;                     \
39                 n -= __v.iov_len;                       \
40         }                                               \
41         n = wanted - n;                                 \
42 }
43
44 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
45         size_t wanted = n;                              \
46         __p = i->kvec;                                  \
47         __v.iov_len = min(n, __p->iov_len - skip);      \
48         if (likely(__v.iov_len)) {                      \
49                 __v.iov_base = __p->iov_base + skip;    \
50                 (void)(STEP);                           \
51                 skip += __v.iov_len;                    \
52                 n -= __v.iov_len;                       \
53         }                                               \
54         while (unlikely(n)) {                           \
55                 __p++;                                  \
56                 __v.iov_len = min(n, __p->iov_len);     \
57                 if (unlikely(!__v.iov_len))             \
58                         continue;                       \
59                 __v.iov_base = __p->iov_base;           \
60                 (void)(STEP);                           \
61                 skip = __v.iov_len;                     \
62                 n -= __v.iov_len;                       \
63         }                                               \
64         n = wanted;                                     \
65 }
66
67 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
68         struct bvec_iter __start;                       \
69         __start.bi_size = n;                            \
70         __start.bi_bvec_done = skip;                    \
71         __start.bi_idx = 0;                             \
72         for_each_bvec(__v, i->bvec, __bi, __start) {    \
73                 if (!__v.bv_len)                        \
74                         continue;                       \
75                 (void)(STEP);                           \
76         }                                               \
77 }
78
79 #define iterate_all_kinds(i, n, v, I, B, K) {                   \
80         if (likely(n)) {                                        \
81                 size_t skip = i->iov_offset;                    \
82                 if (unlikely(i->type & ITER_BVEC)) {            \
83                         struct bio_vec v;                       \
84                         struct bvec_iter __bi;                  \
85                         iterate_bvec(i, n, v, __bi, skip, (B))  \
86                 } else if (unlikely(i->type & ITER_KVEC)) {     \
87                         const struct kvec *kvec;                \
88                         struct kvec v;                          \
89                         iterate_kvec(i, n, v, kvec, skip, (K))  \
90                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
91                 } else {                                        \
92                         const struct iovec *iov;                \
93                         struct iovec v;                         \
94                         iterate_iovec(i, n, v, iov, skip, (I))  \
95                 }                                               \
96         }                                                       \
97 }
98
99 #define iterate_and_advance(i, n, v, I, B, K) {                 \
100         if (unlikely(i->count < n))                             \
101                 n = i->count;                                   \
102         if (i->count) {                                         \
103                 size_t skip = i->iov_offset;                    \
104                 if (unlikely(i->type & ITER_BVEC)) {            \
105                         const struct bio_vec *bvec = i->bvec;   \
106                         struct bio_vec v;                       \
107                         struct bvec_iter __bi;                  \
108                         iterate_bvec(i, n, v, __bi, skip, (B))  \
109                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
110                         i->nr_segs -= i->bvec - bvec;           \
111                         skip = __bi.bi_bvec_done;               \
112                 } else if (unlikely(i->type & ITER_KVEC)) {     \
113                         const struct kvec *kvec;                \
114                         struct kvec v;                          \
115                         iterate_kvec(i, n, v, kvec, skip, (K))  \
116                         if (skip == kvec->iov_len) {            \
117                                 kvec++;                         \
118                                 skip = 0;                       \
119                         }                                       \
120                         i->nr_segs -= kvec - i->kvec;           \
121                         i->kvec = kvec;                         \
122                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
123                         skip += n;                              \
124                 } else {                                        \
125                         const struct iovec *iov;                \
126                         struct iovec v;                         \
127                         iterate_iovec(i, n, v, iov, skip, (I))  \
128                         if (skip == iov->iov_len) {             \
129                                 iov++;                          \
130                                 skip = 0;                       \
131                         }                                       \
132                         i->nr_segs -= iov - i->iov;             \
133                         i->iov = iov;                           \
134                 }                                               \
135                 i->count -= n;                                  \
136                 i->iov_offset = skip;                           \
137         }                                                       \
138 }
139
140 static int copyout(void __user *to, const void *from, size_t n)
141 {
142         if (access_ok(to, n)) {
143                 instrument_copy_to_user(to, from, n);
144                 n = raw_copy_to_user(to, from, n);
145         }
146         return n;
147 }
148
149 static int copyin(void *to, const void __user *from, size_t n)
150 {
151         if (access_ok(from, n)) {
152                 instrument_copy_from_user(to, from, n);
153                 n = raw_copy_from_user(to, from, n);
154         }
155         return n;
156 }
157
158 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
159                          struct iov_iter *i)
160 {
161         size_t skip, copy, left, wanted;
162         const struct iovec *iov;
163         char __user *buf;
164         void *kaddr, *from;
165
166         if (unlikely(bytes > i->count))
167                 bytes = i->count;
168
169         if (unlikely(!bytes))
170                 return 0;
171
172         might_fault();
173         wanted = bytes;
174         iov = i->iov;
175         skip = i->iov_offset;
176         buf = iov->iov_base + skip;
177         copy = min(bytes, iov->iov_len - skip);
178
179         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
180                 kaddr = kmap_atomic(page);
181                 from = kaddr + offset;
182
183                 /* first chunk, usually the only one */
184                 left = copyout(buf, from, copy);
185                 copy -= left;
186                 skip += copy;
187                 from += copy;
188                 bytes -= copy;
189
190                 while (unlikely(!left && bytes)) {
191                         iov++;
192                         buf = iov->iov_base;
193                         copy = min(bytes, iov->iov_len);
194                         left = copyout(buf, from, copy);
195                         copy -= left;
196                         skip = copy;
197                         from += copy;
198                         bytes -= copy;
199                 }
200                 if (likely(!bytes)) {
201                         kunmap_atomic(kaddr);
202                         goto done;
203                 }
204                 offset = from - kaddr;
205                 buf += copy;
206                 kunmap_atomic(kaddr);
207                 copy = min(bytes, iov->iov_len - skip);
208         }
209         /* Too bad - revert to non-atomic kmap */
210
211         kaddr = kmap(page);
212         from = kaddr + offset;
213         left = copyout(buf, from, copy);
214         copy -= left;
215         skip += copy;
216         from += copy;
217         bytes -= copy;
218         while (unlikely(!left && bytes)) {
219                 iov++;
220                 buf = iov->iov_base;
221                 copy = min(bytes, iov->iov_len);
222                 left = copyout(buf, from, copy);
223                 copy -= left;
224                 skip = copy;
225                 from += copy;
226                 bytes -= copy;
227         }
228         kunmap(page);
229
230 done:
231         if (skip == iov->iov_len) {
232                 iov++;
233                 skip = 0;
234         }
235         i->count -= wanted - bytes;
236         i->nr_segs -= iov - i->iov;
237         i->iov = iov;
238         i->iov_offset = skip;
239         return wanted - bytes;
240 }
241
242 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
243                          struct iov_iter *i)
244 {
245         size_t skip, copy, left, wanted;
246         const struct iovec *iov;
247         char __user *buf;
248         void *kaddr, *to;
249
250         if (unlikely(bytes > i->count))
251                 bytes = i->count;
252
253         if (unlikely(!bytes))
254                 return 0;
255
256         might_fault();
257         wanted = bytes;
258         iov = i->iov;
259         skip = i->iov_offset;
260         buf = iov->iov_base + skip;
261         copy = min(bytes, iov->iov_len - skip);
262
263         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
264                 kaddr = kmap_atomic(page);
265                 to = kaddr + offset;
266
267                 /* first chunk, usually the only one */
268                 left = copyin(to, buf, copy);
269                 copy -= left;
270                 skip += copy;
271                 to += copy;
272                 bytes -= copy;
273
274                 while (unlikely(!left && bytes)) {
275                         iov++;
276                         buf = iov->iov_base;
277                         copy = min(bytes, iov->iov_len);
278                         left = copyin(to, buf, copy);
279                         copy -= left;
280                         skip = copy;
281                         to += copy;
282                         bytes -= copy;
283                 }
284                 if (likely(!bytes)) {
285                         kunmap_atomic(kaddr);
286                         goto done;
287                 }
288                 offset = to - kaddr;
289                 buf += copy;
290                 kunmap_atomic(kaddr);
291                 copy = min(bytes, iov->iov_len - skip);
292         }
293         /* Too bad - revert to non-atomic kmap */
294
295         kaddr = kmap(page);
296         to = kaddr + offset;
297         left = copyin(to, buf, copy);
298         copy -= left;
299         skip += copy;
300         to += copy;
301         bytes -= copy;
302         while (unlikely(!left && bytes)) {
303                 iov++;
304                 buf = iov->iov_base;
305                 copy = min(bytes, iov->iov_len);
306                 left = copyin(to, buf, copy);
307                 copy -= left;
308                 skip = copy;
309                 to += copy;
310                 bytes -= copy;
311         }
312         kunmap(page);
313
314 done:
315         if (skip == iov->iov_len) {
316                 iov++;
317                 skip = 0;
318         }
319         i->count -= wanted - bytes;
320         i->nr_segs -= iov - i->iov;
321         i->iov = iov;
322         i->iov_offset = skip;
323         return wanted - bytes;
324 }
325
326 #ifdef PIPE_PARANOIA
327 static bool sanity(const struct iov_iter *i)
328 {
329         struct pipe_inode_info *pipe = i->pipe;
330         unsigned int p_head = pipe->head;
331         unsigned int p_tail = pipe->tail;
332         unsigned int p_mask = pipe->ring_size - 1;
333         unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
334         unsigned int i_head = i->head;
335         unsigned int idx;
336
337         if (i->iov_offset) {
338                 struct pipe_buffer *p;
339                 if (unlikely(p_occupancy == 0))
340                         goto Bad;       // pipe must be non-empty
341                 if (unlikely(i_head != p_head - 1))
342                         goto Bad;       // must be at the last buffer...
343
344                 p = &pipe->bufs[i_head & p_mask];
345                 if (unlikely(p->offset + p->len != i->iov_offset))
346                         goto Bad;       // ... at the end of segment
347         } else {
348                 if (i_head != p_head)
349                         goto Bad;       // must be right after the last buffer
350         }
351         return true;
352 Bad:
353         printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
354         printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
355                         p_head, p_tail, pipe->ring_size);
356         for (idx = 0; idx < pipe->ring_size; idx++)
357                 printk(KERN_ERR "[%p %p %d %d]\n",
358                         pipe->bufs[idx].ops,
359                         pipe->bufs[idx].page,
360                         pipe->bufs[idx].offset,
361                         pipe->bufs[idx].len);
362         WARN_ON(1);
363         return false;
364 }
365 #else
366 #define sanity(i) true
367 #endif
368
369 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
370                          struct iov_iter *i)
371 {
372         struct pipe_inode_info *pipe = i->pipe;
373         struct pipe_buffer *buf;
374         unsigned int p_tail = pipe->tail;
375         unsigned int p_mask = pipe->ring_size - 1;
376         unsigned int i_head = i->head;
377         size_t off;
378
379         if (unlikely(bytes > i->count))
380                 bytes = i->count;
381
382         if (unlikely(!bytes))
383                 return 0;
384
385         if (!sanity(i))
386                 return 0;
387
388         off = i->iov_offset;
389         buf = &pipe->bufs[i_head & p_mask];
390         if (off) {
391                 if (offset == off && buf->page == page) {
392                         /* merge with the last one */
393                         buf->len += bytes;
394                         i->iov_offset += bytes;
395                         goto out;
396                 }
397                 i_head++;
398                 buf = &pipe->bufs[i_head & p_mask];
399         }
400         if (pipe_full(i_head, p_tail, pipe->max_usage))
401                 return 0;
402
403         buf->ops = &page_cache_pipe_buf_ops;
404         get_page(page);
405         buf->page = page;
406         buf->offset = offset;
407         buf->len = bytes;
408
409         pipe->head = i_head + 1;
410         i->iov_offset = offset + bytes;
411         i->head = i_head;
412 out:
413         i->count -= bytes;
414         return bytes;
415 }
416
417 /*
418  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
419  * bytes.  For each iovec, fault in each page that constitutes the iovec.
420  *
421  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
422  * because it is an invalid address).
423  */
424 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
425 {
426         size_t skip = i->iov_offset;
427         const struct iovec *iov;
428         int err;
429         struct iovec v;
430
431         if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
432                 iterate_iovec(i, bytes, v, iov, skip, ({
433                         err = fault_in_pages_readable(v.iov_base, v.iov_len);
434                         if (unlikely(err))
435                         return err;
436                 0;}))
437         }
438         return 0;
439 }
440 EXPORT_SYMBOL(iov_iter_fault_in_readable);
441
442 void iov_iter_init(struct iov_iter *i, unsigned int direction,
443                         const struct iovec *iov, unsigned long nr_segs,
444                         size_t count)
445 {
446         WARN_ON(direction & ~(READ | WRITE));
447         direction &= READ | WRITE;
448
449         /* It will get better.  Eventually... */
450         if (uaccess_kernel()) {
451                 i->type = ITER_KVEC | direction;
452                 i->kvec = (struct kvec *)iov;
453         } else {
454                 i->type = ITER_IOVEC | direction;
455                 i->iov = iov;
456         }
457         i->nr_segs = nr_segs;
458         i->iov_offset = 0;
459         i->count = count;
460 }
461 EXPORT_SYMBOL(iov_iter_init);
462
463 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
464 {
465         char *from = kmap_atomic(page);
466         memcpy(to, from + offset, len);
467         kunmap_atomic(from);
468 }
469
470 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
471 {
472         char *to = kmap_atomic(page);
473         memcpy(to + offset, from, len);
474         kunmap_atomic(to);
475 }
476
477 static void memzero_page(struct page *page, size_t offset, size_t len)
478 {
479         char *addr = kmap_atomic(page);
480         memset(addr + offset, 0, len);
481         kunmap_atomic(addr);
482 }
483
484 static inline bool allocated(struct pipe_buffer *buf)
485 {
486         return buf->ops == &default_pipe_buf_ops;
487 }
488
489 static inline void data_start(const struct iov_iter *i,
490                               unsigned int *iter_headp, size_t *offp)
491 {
492         unsigned int p_mask = i->pipe->ring_size - 1;
493         unsigned int iter_head = i->head;
494         size_t off = i->iov_offset;
495
496         if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
497                     off == PAGE_SIZE)) {
498                 iter_head++;
499                 off = 0;
500         }
501         *iter_headp = iter_head;
502         *offp = off;
503 }
504
505 static size_t push_pipe(struct iov_iter *i, size_t size,
506                         int *iter_headp, size_t *offp)
507 {
508         struct pipe_inode_info *pipe = i->pipe;
509         unsigned int p_tail = pipe->tail;
510         unsigned int p_mask = pipe->ring_size - 1;
511         unsigned int iter_head;
512         size_t off;
513         ssize_t left;
514
515         if (unlikely(size > i->count))
516                 size = i->count;
517         if (unlikely(!size))
518                 return 0;
519
520         left = size;
521         data_start(i, &iter_head, &off);
522         *iter_headp = iter_head;
523         *offp = off;
524         if (off) {
525                 left -= PAGE_SIZE - off;
526                 if (left <= 0) {
527                         pipe->bufs[iter_head & p_mask].len += size;
528                         return size;
529                 }
530                 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
531                 iter_head++;
532         }
533         while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
534                 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
535                 struct page *page = alloc_page(GFP_USER);
536                 if (!page)
537                         break;
538
539                 buf->ops = &default_pipe_buf_ops;
540                 buf->page = page;
541                 buf->offset = 0;
542                 buf->len = min_t(ssize_t, left, PAGE_SIZE);
543                 left -= buf->len;
544                 iter_head++;
545                 pipe->head = iter_head;
546
547                 if (left == 0)
548                         return size;
549         }
550         return size - left;
551 }
552
553 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
554                                 struct iov_iter *i)
555 {
556         struct pipe_inode_info *pipe = i->pipe;
557         unsigned int p_mask = pipe->ring_size - 1;
558         unsigned int i_head;
559         size_t n, off;
560
561         if (!sanity(i))
562                 return 0;
563
564         bytes = n = push_pipe(i, bytes, &i_head, &off);
565         if (unlikely(!n))
566                 return 0;
567         do {
568                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
569                 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
570                 i->head = i_head;
571                 i->iov_offset = off + chunk;
572                 n -= chunk;
573                 addr += chunk;
574                 off = 0;
575                 i_head++;
576         } while (n);
577         i->count -= bytes;
578         return bytes;
579 }
580
581 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
582                               __wsum sum, size_t off)
583 {
584         __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
585         return csum_block_add(sum, next, off);
586 }
587
588 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
589                                 __wsum *csum, struct iov_iter *i)
590 {
591         struct pipe_inode_info *pipe = i->pipe;
592         unsigned int p_mask = pipe->ring_size - 1;
593         unsigned int i_head;
594         size_t n, r;
595         size_t off = 0;
596         __wsum sum = *csum;
597
598         if (!sanity(i))
599                 return 0;
600
601         bytes = n = push_pipe(i, bytes, &i_head, &r);
602         if (unlikely(!n))
603                 return 0;
604         do {
605                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
606                 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
607                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
608                 kunmap_atomic(p);
609                 i->head = i_head;
610                 i->iov_offset = r + chunk;
611                 n -= chunk;
612                 off += chunk;
613                 addr += chunk;
614                 r = 0;
615                 i_head++;
616         } while (n);
617         i->count -= bytes;
618         *csum = sum;
619         return bytes;
620 }
621
622 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
623 {
624         const char *from = addr;
625         if (unlikely(iov_iter_is_pipe(i)))
626                 return copy_pipe_to_iter(addr, bytes, i);
627         if (iter_is_iovec(i))
628                 might_fault();
629         iterate_and_advance(i, bytes, v,
630                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
631                 memcpy_to_page(v.bv_page, v.bv_offset,
632                                (from += v.bv_len) - v.bv_len, v.bv_len),
633                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
634         )
635
636         return bytes;
637 }
638 EXPORT_SYMBOL(_copy_to_iter);
639
640 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
641 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
642 {
643         if (access_ok(to, n)) {
644                 instrument_copy_to_user(to, from, n);
645                 n = copy_to_user_mcsafe((__force void *) to, from, n);
646         }
647         return n;
648 }
649
650 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
651                 const char *from, size_t len)
652 {
653         unsigned long ret;
654         char *to;
655
656         to = kmap_atomic(page);
657         ret = memcpy_mcsafe(to + offset, from, len);
658         kunmap_atomic(to);
659
660         return ret;
661 }
662
663 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
664                                 struct iov_iter *i)
665 {
666         struct pipe_inode_info *pipe = i->pipe;
667         unsigned int p_mask = pipe->ring_size - 1;
668         unsigned int i_head;
669         size_t n, off, xfer = 0;
670
671         if (!sanity(i))
672                 return 0;
673
674         bytes = n = push_pipe(i, bytes, &i_head, &off);
675         if (unlikely(!n))
676                 return 0;
677         do {
678                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
679                 unsigned long rem;
680
681                 rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
682                                             off, addr, chunk);
683                 i->head = i_head;
684                 i->iov_offset = off + chunk - rem;
685                 xfer += chunk - rem;
686                 if (rem)
687                         break;
688                 n -= chunk;
689                 addr += chunk;
690                 off = 0;
691                 i_head++;
692         } while (n);
693         i->count -= xfer;
694         return xfer;
695 }
696
697 /**
698  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
699  * @addr: source kernel address
700  * @bytes: total transfer length
701  * @iter: destination iterator
702  *
703  * The pmem driver arranges for filesystem-dax to use this facility via
704  * dax_copy_to_iter() for protecting read/write to persistent memory.
705  * Unless / until an architecture can guarantee identical performance
706  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
707  * performance regression to switch more users to the mcsafe version.
708  *
709  * Otherwise, the main differences between this and typical _copy_to_iter().
710  *
711  * * Typical tail/residue handling after a fault retries the copy
712  *   byte-by-byte until the fault happens again. Re-triggering machine
713  *   checks is potentially fatal so the implementation uses source
714  *   alignment and poison alignment assumptions to avoid re-triggering
715  *   hardware exceptions.
716  *
717  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
718  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
719  *   a short copy.
720  *
721  * See MCSAFE_TEST for self-test.
722  */
723 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
724 {
725         const char *from = addr;
726         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
727
728         if (unlikely(iov_iter_is_pipe(i)))
729                 return copy_pipe_to_iter_mcsafe(addr, bytes, i);
730         if (iter_is_iovec(i))
731                 might_fault();
732         iterate_and_advance(i, bytes, v,
733                 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
734                 ({
735                 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
736                                (from += v.bv_len) - v.bv_len, v.bv_len);
737                 if (rem) {
738                         curr_addr = (unsigned long) from;
739                         bytes = curr_addr - s_addr - rem;
740                         return bytes;
741                 }
742                 }),
743                 ({
744                 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
745                                 v.iov_len);
746                 if (rem) {
747                         curr_addr = (unsigned long) from;
748                         bytes = curr_addr - s_addr - rem;
749                         return bytes;
750                 }
751                 })
752         )
753
754         return bytes;
755 }
756 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
757 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
758
759 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
760 {
761         char *to = addr;
762         if (unlikely(iov_iter_is_pipe(i))) {
763                 WARN_ON(1);
764                 return 0;
765         }
766         if (iter_is_iovec(i))
767                 might_fault();
768         iterate_and_advance(i, bytes, v,
769                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
770                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
771                                  v.bv_offset, v.bv_len),
772                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
773         )
774
775         return bytes;
776 }
777 EXPORT_SYMBOL(_copy_from_iter);
778
779 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
780 {
781         char *to = addr;
782         if (unlikely(iov_iter_is_pipe(i))) {
783                 WARN_ON(1);
784                 return false;
785         }
786         if (unlikely(i->count < bytes))
787                 return false;
788
789         if (iter_is_iovec(i))
790                 might_fault();
791         iterate_all_kinds(i, bytes, v, ({
792                 if (copyin((to += v.iov_len) - v.iov_len,
793                                       v.iov_base, v.iov_len))
794                         return false;
795                 0;}),
796                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
797                                  v.bv_offset, v.bv_len),
798                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
799         )
800
801         iov_iter_advance(i, bytes);
802         return true;
803 }
804 EXPORT_SYMBOL(_copy_from_iter_full);
805
806 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
807 {
808         char *to = addr;
809         if (unlikely(iov_iter_is_pipe(i))) {
810                 WARN_ON(1);
811                 return 0;
812         }
813         iterate_and_advance(i, bytes, v,
814                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
815                                          v.iov_base, v.iov_len),
816                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
817                                  v.bv_offset, v.bv_len),
818                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
819         )
820
821         return bytes;
822 }
823 EXPORT_SYMBOL(_copy_from_iter_nocache);
824
825 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
826 /**
827  * _copy_from_iter_flushcache - write destination through cpu cache
828  * @addr: destination kernel address
829  * @bytes: total transfer length
830  * @iter: source iterator
831  *
832  * The pmem driver arranges for filesystem-dax to use this facility via
833  * dax_copy_from_iter() for ensuring that writes to persistent memory
834  * are flushed through the CPU cache. It is differentiated from
835  * _copy_from_iter_nocache() in that guarantees all data is flushed for
836  * all iterator types. The _copy_from_iter_nocache() only attempts to
837  * bypass the cache for the ITER_IOVEC case, and on some archs may use
838  * instructions that strand dirty-data in the cache.
839  */
840 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
841 {
842         char *to = addr;
843         if (unlikely(iov_iter_is_pipe(i))) {
844                 WARN_ON(1);
845                 return 0;
846         }
847         iterate_and_advance(i, bytes, v,
848                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
849                                          v.iov_base, v.iov_len),
850                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
851                                  v.bv_offset, v.bv_len),
852                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
853                         v.iov_len)
854         )
855
856         return bytes;
857 }
858 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
859 #endif
860
861 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
862 {
863         char *to = addr;
864         if (unlikely(iov_iter_is_pipe(i))) {
865                 WARN_ON(1);
866                 return false;
867         }
868         if (unlikely(i->count < bytes))
869                 return false;
870         iterate_all_kinds(i, bytes, v, ({
871                 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
872                                              v.iov_base, v.iov_len))
873                         return false;
874                 0;}),
875                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
876                                  v.bv_offset, v.bv_len),
877                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
878         )
879
880         iov_iter_advance(i, bytes);
881         return true;
882 }
883 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
884
885 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
886 {
887         struct page *head;
888         size_t v = n + offset;
889
890         /*
891          * The general case needs to access the page order in order
892          * to compute the page size.
893          * However, we mostly deal with order-0 pages and thus can
894          * avoid a possible cache line miss for requests that fit all
895          * page orders.
896          */
897         if (n <= v && v <= PAGE_SIZE)
898                 return true;
899
900         head = compound_head(page);
901         v += (page - head) << PAGE_SHIFT;
902
903         if (likely(n <= v && v <= (page_size(head))))
904                 return true;
905         WARN_ON(1);
906         return false;
907 }
908
909 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
910                          struct iov_iter *i)
911 {
912         if (unlikely(!page_copy_sane(page, offset, bytes)))
913                 return 0;
914         if (i->type & (ITER_BVEC|ITER_KVEC)) {
915                 void *kaddr = kmap_atomic(page);
916                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
917                 kunmap_atomic(kaddr);
918                 return wanted;
919         } else if (unlikely(iov_iter_is_discard(i)))
920                 return bytes;
921         else if (likely(!iov_iter_is_pipe(i)))
922                 return copy_page_to_iter_iovec(page, offset, bytes, i);
923         else
924                 return copy_page_to_iter_pipe(page, offset, bytes, i);
925 }
926 EXPORT_SYMBOL(copy_page_to_iter);
927
928 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
929                          struct iov_iter *i)
930 {
931         if (unlikely(!page_copy_sane(page, offset, bytes)))
932                 return 0;
933         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
934                 WARN_ON(1);
935                 return 0;
936         }
937         if (i->type & (ITER_BVEC|ITER_KVEC)) {
938                 void *kaddr = kmap_atomic(page);
939                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
940                 kunmap_atomic(kaddr);
941                 return wanted;
942         } else
943                 return copy_page_from_iter_iovec(page, offset, bytes, i);
944 }
945 EXPORT_SYMBOL(copy_page_from_iter);
946
947 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
948 {
949         struct pipe_inode_info *pipe = i->pipe;
950         unsigned int p_mask = pipe->ring_size - 1;
951         unsigned int i_head;
952         size_t n, off;
953
954         if (!sanity(i))
955                 return 0;
956
957         bytes = n = push_pipe(i, bytes, &i_head, &off);
958         if (unlikely(!n))
959                 return 0;
960
961         do {
962                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
963                 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
964                 i->head = i_head;
965                 i->iov_offset = off + chunk;
966                 n -= chunk;
967                 off = 0;
968                 i_head++;
969         } while (n);
970         i->count -= bytes;
971         return bytes;
972 }
973
974 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
975 {
976         if (unlikely(iov_iter_is_pipe(i)))
977                 return pipe_zero(bytes, i);
978         iterate_and_advance(i, bytes, v,
979                 clear_user(v.iov_base, v.iov_len),
980                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
981                 memset(v.iov_base, 0, v.iov_len)
982         )
983
984         return bytes;
985 }
986 EXPORT_SYMBOL(iov_iter_zero);
987
988 size_t iov_iter_copy_from_user_atomic(struct page *page,
989                 struct iov_iter *i, unsigned long offset, size_t bytes)
990 {
991         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
992         if (unlikely(!page_copy_sane(page, offset, bytes))) {
993                 kunmap_atomic(kaddr);
994                 return 0;
995         }
996         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
997                 kunmap_atomic(kaddr);
998                 WARN_ON(1);
999                 return 0;
1000         }
1001         iterate_all_kinds(i, bytes, v,
1002                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1003                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1004                                  v.bv_offset, v.bv_len),
1005                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
1006         )
1007         kunmap_atomic(kaddr);
1008         return bytes;
1009 }
1010 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1011
1012 static inline void pipe_truncate(struct iov_iter *i)
1013 {
1014         struct pipe_inode_info *pipe = i->pipe;
1015         unsigned int p_tail = pipe->tail;
1016         unsigned int p_head = pipe->head;
1017         unsigned int p_mask = pipe->ring_size - 1;
1018
1019         if (!pipe_empty(p_head, p_tail)) {
1020                 struct pipe_buffer *buf;
1021                 unsigned int i_head = i->head;
1022                 size_t off = i->iov_offset;
1023
1024                 if (off) {
1025                         buf = &pipe->bufs[i_head & p_mask];
1026                         buf->len = off - buf->offset;
1027                         i_head++;
1028                 }
1029                 while (p_head != i_head) {
1030                         p_head--;
1031                         pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1032                 }
1033
1034                 pipe->head = p_head;
1035         }
1036 }
1037
1038 static void pipe_advance(struct iov_iter *i, size_t size)
1039 {
1040         struct pipe_inode_info *pipe = i->pipe;
1041         if (unlikely(i->count < size))
1042                 size = i->count;
1043         if (size) {
1044                 struct pipe_buffer *buf;
1045                 unsigned int p_mask = pipe->ring_size - 1;
1046                 unsigned int i_head = i->head;
1047                 size_t off = i->iov_offset, left = size;
1048
1049                 if (off) /* make it relative to the beginning of buffer */
1050                         left += off - pipe->bufs[i_head & p_mask].offset;
1051                 while (1) {
1052                         buf = &pipe->bufs[i_head & p_mask];
1053                         if (left <= buf->len)
1054                                 break;
1055                         left -= buf->len;
1056                         i_head++;
1057                 }
1058                 i->head = i_head;
1059                 i->iov_offset = buf->offset + left;
1060         }
1061         i->count -= size;
1062         /* ... and discard everything past that point */
1063         pipe_truncate(i);
1064 }
1065
1066 void iov_iter_advance(struct iov_iter *i, size_t size)
1067 {
1068         if (unlikely(iov_iter_is_pipe(i))) {
1069                 pipe_advance(i, size);
1070                 return;
1071         }
1072         if (unlikely(iov_iter_is_discard(i))) {
1073                 i->count -= size;
1074                 return;
1075         }
1076         iterate_and_advance(i, size, v, 0, 0, 0)
1077 }
1078 EXPORT_SYMBOL(iov_iter_advance);
1079
1080 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1081 {
1082         if (!unroll)
1083                 return;
1084         if (WARN_ON(unroll > MAX_RW_COUNT))
1085                 return;
1086         i->count += unroll;
1087         if (unlikely(iov_iter_is_pipe(i))) {
1088                 struct pipe_inode_info *pipe = i->pipe;
1089                 unsigned int p_mask = pipe->ring_size - 1;
1090                 unsigned int i_head = i->head;
1091                 size_t off = i->iov_offset;
1092                 while (1) {
1093                         struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1094                         size_t n = off - b->offset;
1095                         if (unroll < n) {
1096                                 off -= unroll;
1097                                 break;
1098                         }
1099                         unroll -= n;
1100                         if (!unroll && i_head == i->start_head) {
1101                                 off = 0;
1102                                 break;
1103                         }
1104                         i_head--;
1105                         b = &pipe->bufs[i_head & p_mask];
1106                         off = b->offset + b->len;
1107                 }
1108                 i->iov_offset = off;
1109                 i->head = i_head;
1110                 pipe_truncate(i);
1111                 return;
1112         }
1113         if (unlikely(iov_iter_is_discard(i)))
1114                 return;
1115         if (unroll <= i->iov_offset) {
1116                 i->iov_offset -= unroll;
1117                 return;
1118         }
1119         unroll -= i->iov_offset;
1120         if (iov_iter_is_bvec(i)) {
1121                 const struct bio_vec *bvec = i->bvec;
1122                 while (1) {
1123                         size_t n = (--bvec)->bv_len;
1124                         i->nr_segs++;
1125                         if (unroll <= n) {
1126                                 i->bvec = bvec;
1127                                 i->iov_offset = n - unroll;
1128                                 return;
1129                         }
1130                         unroll -= n;
1131                 }
1132         } else { /* same logics for iovec and kvec */
1133                 const struct iovec *iov = i->iov;
1134                 while (1) {
1135                         size_t n = (--iov)->iov_len;
1136                         i->nr_segs++;
1137                         if (unroll <= n) {
1138                                 i->iov = iov;
1139                                 i->iov_offset = n - unroll;
1140                                 return;
1141                         }
1142                         unroll -= n;
1143                 }
1144         }
1145 }
1146 EXPORT_SYMBOL(iov_iter_revert);
1147
1148 /*
1149  * Return the count of just the current iov_iter segment.
1150  */
1151 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1152 {
1153         if (unlikely(iov_iter_is_pipe(i)))
1154                 return i->count;        // it is a silly place, anyway
1155         if (i->nr_segs == 1)
1156                 return i->count;
1157         if (unlikely(iov_iter_is_discard(i)))
1158                 return i->count;
1159         else if (iov_iter_is_bvec(i))
1160                 return min(i->count, i->bvec->bv_len - i->iov_offset);
1161         else
1162                 return min(i->count, i->iov->iov_len - i->iov_offset);
1163 }
1164 EXPORT_SYMBOL(iov_iter_single_seg_count);
1165
1166 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1167                         const struct kvec *kvec, unsigned long nr_segs,
1168                         size_t count)
1169 {
1170         WARN_ON(direction & ~(READ | WRITE));
1171         i->type = ITER_KVEC | (direction & (READ | WRITE));
1172         i->kvec = kvec;
1173         i->nr_segs = nr_segs;
1174         i->iov_offset = 0;
1175         i->count = count;
1176 }
1177 EXPORT_SYMBOL(iov_iter_kvec);
1178
1179 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1180                         const struct bio_vec *bvec, unsigned long nr_segs,
1181                         size_t count)
1182 {
1183         WARN_ON(direction & ~(READ | WRITE));
1184         i->type = ITER_BVEC | (direction & (READ | WRITE));
1185         i->bvec = bvec;
1186         i->nr_segs = nr_segs;
1187         i->iov_offset = 0;
1188         i->count = count;
1189 }
1190 EXPORT_SYMBOL(iov_iter_bvec);
1191
1192 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1193                         struct pipe_inode_info *pipe,
1194                         size_t count)
1195 {
1196         BUG_ON(direction != READ);
1197         WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1198         i->type = ITER_PIPE | READ;
1199         i->pipe = pipe;
1200         i->head = pipe->head;
1201         i->iov_offset = 0;
1202         i->count = count;
1203         i->start_head = i->head;
1204 }
1205 EXPORT_SYMBOL(iov_iter_pipe);
1206
1207 /**
1208  * iov_iter_discard - Initialise an I/O iterator that discards data
1209  * @i: The iterator to initialise.
1210  * @direction: The direction of the transfer.
1211  * @count: The size of the I/O buffer in bytes.
1212  *
1213  * Set up an I/O iterator that just discards everything that's written to it.
1214  * It's only available as a READ iterator.
1215  */
1216 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1217 {
1218         BUG_ON(direction != READ);
1219         i->type = ITER_DISCARD | READ;
1220         i->count = count;
1221         i->iov_offset = 0;
1222 }
1223 EXPORT_SYMBOL(iov_iter_discard);
1224
1225 unsigned long iov_iter_alignment(const struct iov_iter *i)
1226 {
1227         unsigned long res = 0;
1228         size_t size = i->count;
1229
1230         if (unlikely(iov_iter_is_pipe(i))) {
1231                 unsigned int p_mask = i->pipe->ring_size - 1;
1232
1233                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1234                         return size | i->iov_offset;
1235                 return size;
1236         }
1237         iterate_all_kinds(i, size, v,
1238                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1239                 res |= v.bv_offset | v.bv_len,
1240                 res |= (unsigned long)v.iov_base | v.iov_len
1241         )
1242         return res;
1243 }
1244 EXPORT_SYMBOL(iov_iter_alignment);
1245
1246 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1247 {
1248         unsigned long res = 0;
1249         size_t size = i->count;
1250
1251         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1252                 WARN_ON(1);
1253                 return ~0U;
1254         }
1255
1256         iterate_all_kinds(i, size, v,
1257                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1258                         (size != v.iov_len ? size : 0), 0),
1259                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1260                         (size != v.bv_len ? size : 0)),
1261                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1262                         (size != v.iov_len ? size : 0))
1263                 );
1264         return res;
1265 }
1266 EXPORT_SYMBOL(iov_iter_gap_alignment);
1267
1268 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1269                                 size_t maxsize,
1270                                 struct page **pages,
1271                                 int iter_head,
1272                                 size_t *start)
1273 {
1274         struct pipe_inode_info *pipe = i->pipe;
1275         unsigned int p_mask = pipe->ring_size - 1;
1276         ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1277         if (!n)
1278                 return -EFAULT;
1279
1280         maxsize = n;
1281         n += *start;
1282         while (n > 0) {
1283                 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1284                 iter_head++;
1285                 n -= PAGE_SIZE;
1286         }
1287
1288         return maxsize;
1289 }
1290
1291 static ssize_t pipe_get_pages(struct iov_iter *i,
1292                    struct page **pages, size_t maxsize, unsigned maxpages,
1293                    size_t *start)
1294 {
1295         unsigned int iter_head, npages;
1296         size_t capacity;
1297
1298         if (!maxsize)
1299                 return 0;
1300
1301         if (!sanity(i))
1302                 return -EFAULT;
1303
1304         data_start(i, &iter_head, start);
1305         /* Amount of free space: some of this one + all after this one */
1306         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1307         capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1308
1309         return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1310 }
1311
1312 ssize_t iov_iter_get_pages(struct iov_iter *i,
1313                    struct page **pages, size_t maxsize, unsigned maxpages,
1314                    size_t *start)
1315 {
1316         if (maxsize > i->count)
1317                 maxsize = i->count;
1318
1319         if (unlikely(iov_iter_is_pipe(i)))
1320                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1321         if (unlikely(iov_iter_is_discard(i)))
1322                 return -EFAULT;
1323
1324         iterate_all_kinds(i, maxsize, v, ({
1325                 unsigned long addr = (unsigned long)v.iov_base;
1326                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1327                 int n;
1328                 int res;
1329
1330                 if (len > maxpages * PAGE_SIZE)
1331                         len = maxpages * PAGE_SIZE;
1332                 addr &= ~(PAGE_SIZE - 1);
1333                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1334                 res = get_user_pages_fast(addr, n,
1335                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1336                                 pages);
1337                 if (unlikely(res < 0))
1338                         return res;
1339                 return (res == n ? len : res * PAGE_SIZE) - *start;
1340         0;}),({
1341                 /* can't be more than PAGE_SIZE */
1342                 *start = v.bv_offset;
1343                 get_page(*pages = v.bv_page);
1344                 return v.bv_len;
1345         }),({
1346                 return -EFAULT;
1347         })
1348         )
1349         return 0;
1350 }
1351 EXPORT_SYMBOL(iov_iter_get_pages);
1352
1353 static struct page **get_pages_array(size_t n)
1354 {
1355         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1356 }
1357
1358 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1359                    struct page ***pages, size_t maxsize,
1360                    size_t *start)
1361 {
1362         struct page **p;
1363         unsigned int iter_head, npages;
1364         ssize_t n;
1365
1366         if (!maxsize)
1367                 return 0;
1368
1369         if (!sanity(i))
1370                 return -EFAULT;
1371
1372         data_start(i, &iter_head, start);
1373         /* Amount of free space: some of this one + all after this one */
1374         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1375         n = npages * PAGE_SIZE - *start;
1376         if (maxsize > n)
1377                 maxsize = n;
1378         else
1379                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1380         p = get_pages_array(npages);
1381         if (!p)
1382                 return -ENOMEM;
1383         n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1384         if (n > 0)
1385                 *pages = p;
1386         else
1387                 kvfree(p);
1388         return n;
1389 }
1390
1391 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1392                    struct page ***pages, size_t maxsize,
1393                    size_t *start)
1394 {
1395         struct page **p;
1396
1397         if (maxsize > i->count)
1398                 maxsize = i->count;
1399
1400         if (unlikely(iov_iter_is_pipe(i)))
1401                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1402         if (unlikely(iov_iter_is_discard(i)))
1403                 return -EFAULT;
1404
1405         iterate_all_kinds(i, maxsize, v, ({
1406                 unsigned long addr = (unsigned long)v.iov_base;
1407                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1408                 int n;
1409                 int res;
1410
1411                 addr &= ~(PAGE_SIZE - 1);
1412                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1413                 p = get_pages_array(n);
1414                 if (!p)
1415                         return -ENOMEM;
1416                 res = get_user_pages_fast(addr, n,
1417                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1418                 if (unlikely(res < 0)) {
1419                         kvfree(p);
1420                         return res;
1421                 }
1422                 *pages = p;
1423                 return (res == n ? len : res * PAGE_SIZE) - *start;
1424         0;}),({
1425                 /* can't be more than PAGE_SIZE */
1426                 *start = v.bv_offset;
1427                 *pages = p = get_pages_array(1);
1428                 if (!p)
1429                         return -ENOMEM;
1430                 get_page(*p = v.bv_page);
1431                 return v.bv_len;
1432         }),({
1433                 return -EFAULT;
1434         })
1435         )
1436         return 0;
1437 }
1438 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1439
1440 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1441                                struct iov_iter *i)
1442 {
1443         char *to = addr;
1444         __wsum sum, next;
1445         size_t off = 0;
1446         sum = *csum;
1447         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1448                 WARN_ON(1);
1449                 return 0;
1450         }
1451         iterate_and_advance(i, bytes, v, ({
1452                 int err = 0;
1453                 next = csum_and_copy_from_user(v.iov_base,
1454                                                (to += v.iov_len) - v.iov_len,
1455                                                v.iov_len, 0, &err);
1456                 if (!err) {
1457                         sum = csum_block_add(sum, next, off);
1458                         off += v.iov_len;
1459                 }
1460                 err ? v.iov_len : 0;
1461         }), ({
1462                 char *p = kmap_atomic(v.bv_page);
1463                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1464                                       p + v.bv_offset, v.bv_len,
1465                                       sum, off);
1466                 kunmap_atomic(p);
1467                 off += v.bv_len;
1468         }),({
1469                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1470                                       v.iov_base, v.iov_len,
1471                                       sum, off);
1472                 off += v.iov_len;
1473         })
1474         )
1475         *csum = sum;
1476         return bytes;
1477 }
1478 EXPORT_SYMBOL(csum_and_copy_from_iter);
1479
1480 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1481                                struct iov_iter *i)
1482 {
1483         char *to = addr;
1484         __wsum sum, next;
1485         size_t off = 0;
1486         sum = *csum;
1487         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1488                 WARN_ON(1);
1489                 return false;
1490         }
1491         if (unlikely(i->count < bytes))
1492                 return false;
1493         iterate_all_kinds(i, bytes, v, ({
1494                 int err = 0;
1495                 next = csum_and_copy_from_user(v.iov_base,
1496                                                (to += v.iov_len) - v.iov_len,
1497                                                v.iov_len, 0, &err);
1498                 if (err)
1499                         return false;
1500                 sum = csum_block_add(sum, next, off);
1501                 off += v.iov_len;
1502                 0;
1503         }), ({
1504                 char *p = kmap_atomic(v.bv_page);
1505                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1506                                       p + v.bv_offset, v.bv_len,
1507                                       sum, off);
1508                 kunmap_atomic(p);
1509                 off += v.bv_len;
1510         }),({
1511                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1512                                       v.iov_base, v.iov_len,
1513                                       sum, off);
1514                 off += v.iov_len;
1515         })
1516         )
1517         *csum = sum;
1518         iov_iter_advance(i, bytes);
1519         return true;
1520 }
1521 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1522
1523 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
1524                              struct iov_iter *i)
1525 {
1526         const char *from = addr;
1527         __wsum *csum = csump;
1528         __wsum sum, next;
1529         size_t off = 0;
1530
1531         if (unlikely(iov_iter_is_pipe(i)))
1532                 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
1533
1534         sum = *csum;
1535         if (unlikely(iov_iter_is_discard(i))) {
1536                 WARN_ON(1);     /* for now */
1537                 return 0;
1538         }
1539         iterate_and_advance(i, bytes, v, ({
1540                 int err = 0;
1541                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1542                                              v.iov_base,
1543                                              v.iov_len, 0, &err);
1544                 if (!err) {
1545                         sum = csum_block_add(sum, next, off);
1546                         off += v.iov_len;
1547                 }
1548                 err ? v.iov_len : 0;
1549         }), ({
1550                 char *p = kmap_atomic(v.bv_page);
1551                 sum = csum_and_memcpy(p + v.bv_offset,
1552                                       (from += v.bv_len) - v.bv_len,
1553                                       v.bv_len, sum, off);
1554                 kunmap_atomic(p);
1555                 off += v.bv_len;
1556         }),({
1557                 sum = csum_and_memcpy(v.iov_base,
1558                                      (from += v.iov_len) - v.iov_len,
1559                                      v.iov_len, sum, off);
1560                 off += v.iov_len;
1561         })
1562         )
1563         *csum = sum;
1564         return bytes;
1565 }
1566 EXPORT_SYMBOL(csum_and_copy_to_iter);
1567
1568 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1569                 struct iov_iter *i)
1570 {
1571 #ifdef CONFIG_CRYPTO_HASH
1572         struct ahash_request *hash = hashp;
1573         struct scatterlist sg;
1574         size_t copied;
1575
1576         copied = copy_to_iter(addr, bytes, i);
1577         sg_init_one(&sg, addr, copied);
1578         ahash_request_set_crypt(hash, &sg, NULL, copied);
1579         crypto_ahash_update(hash);
1580         return copied;
1581 #else
1582         return 0;
1583 #endif
1584 }
1585 EXPORT_SYMBOL(hash_and_copy_to_iter);
1586
1587 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1588 {
1589         size_t size = i->count;
1590         int npages = 0;
1591
1592         if (!size)
1593                 return 0;
1594         if (unlikely(iov_iter_is_discard(i)))
1595                 return 0;
1596
1597         if (unlikely(iov_iter_is_pipe(i))) {
1598                 struct pipe_inode_info *pipe = i->pipe;
1599                 unsigned int iter_head;
1600                 size_t off;
1601
1602                 if (!sanity(i))
1603                         return 0;
1604
1605                 data_start(i, &iter_head, &off);
1606                 /* some of this one + all after this one */
1607                 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1608                 if (npages >= maxpages)
1609                         return maxpages;
1610         } else iterate_all_kinds(i, size, v, ({
1611                 unsigned long p = (unsigned long)v.iov_base;
1612                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1613                         - p / PAGE_SIZE;
1614                 if (npages >= maxpages)
1615                         return maxpages;
1616         0;}),({
1617                 npages++;
1618                 if (npages >= maxpages)
1619                         return maxpages;
1620         }),({
1621                 unsigned long p = (unsigned long)v.iov_base;
1622                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1623                         - p / PAGE_SIZE;
1624                 if (npages >= maxpages)
1625                         return maxpages;
1626         })
1627         )
1628         return npages;
1629 }
1630 EXPORT_SYMBOL(iov_iter_npages);
1631
1632 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1633 {
1634         *new = *old;
1635         if (unlikely(iov_iter_is_pipe(new))) {
1636                 WARN_ON(1);
1637                 return NULL;
1638         }
1639         if (unlikely(iov_iter_is_discard(new)))
1640                 return NULL;
1641         if (iov_iter_is_bvec(new))
1642                 return new->bvec = kmemdup(new->bvec,
1643                                     new->nr_segs * sizeof(struct bio_vec),
1644                                     flags);
1645         else
1646                 /* iovec and kvec have identical layout */
1647                 return new->iov = kmemdup(new->iov,
1648                                    new->nr_segs * sizeof(struct iovec),
1649                                    flags);
1650 }
1651 EXPORT_SYMBOL(dup_iter);
1652
1653 /**
1654  * import_iovec() - Copy an array of &struct iovec from userspace
1655  *     into the kernel, check that it is valid, and initialize a new
1656  *     &struct iov_iter iterator to access it.
1657  *
1658  * @type: One of %READ or %WRITE.
1659  * @uvector: Pointer to the userspace array.
1660  * @nr_segs: Number of elements in userspace array.
1661  * @fast_segs: Number of elements in @iov.
1662  * @iov: (input and output parameter) Pointer to pointer to (usually small
1663  *     on-stack) kernel array.
1664  * @i: Pointer to iterator that will be initialized on success.
1665  *
1666  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1667  * then this function places %NULL in *@iov on return. Otherwise, a new
1668  * array will be allocated and the result placed in *@iov. This means that
1669  * the caller may call kfree() on *@iov regardless of whether the small
1670  * on-stack array was used or not (and regardless of whether this function
1671  * returns an error or not).
1672  *
1673  * Return: Negative error code on error, bytes imported on success
1674  */
1675 ssize_t import_iovec(int type, const struct iovec __user * uvector,
1676                  unsigned nr_segs, unsigned fast_segs,
1677                  struct iovec **iov, struct iov_iter *i)
1678 {
1679         ssize_t n;
1680         struct iovec *p;
1681         n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1682                                   *iov, &p);
1683         if (n < 0) {
1684                 if (p != *iov)
1685                         kfree(p);
1686                 *iov = NULL;
1687                 return n;
1688         }
1689         iov_iter_init(i, type, p, nr_segs, n);
1690         *iov = p == *iov ? NULL : p;
1691         return n;
1692 }
1693 EXPORT_SYMBOL(import_iovec);
1694
1695 #ifdef CONFIG_COMPAT
1696 #include <linux/compat.h>
1697
1698 ssize_t compat_import_iovec(int type,
1699                 const struct compat_iovec __user * uvector,
1700                 unsigned nr_segs, unsigned fast_segs,
1701                 struct iovec **iov, struct iov_iter *i)
1702 {
1703         ssize_t n;
1704         struct iovec *p;
1705         n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1706                                   *iov, &p);
1707         if (n < 0) {
1708                 if (p != *iov)
1709                         kfree(p);
1710                 *iov = NULL;
1711                 return n;
1712         }
1713         iov_iter_init(i, type, p, nr_segs, n);
1714         *iov = p == *iov ? NULL : p;
1715         return n;
1716 }
1717 EXPORT_SYMBOL(compat_import_iovec);
1718 #endif
1719
1720 int import_single_range(int rw, void __user *buf, size_t len,
1721                  struct iovec *iov, struct iov_iter *i)
1722 {
1723         if (len > MAX_RW_COUNT)
1724                 len = MAX_RW_COUNT;
1725         if (unlikely(!access_ok(buf, len)))
1726                 return -EFAULT;
1727
1728         iov->iov_base = buf;
1729         iov->iov_len = len;
1730         iov_iter_init(i, rw, iov, 1, len);
1731         return 0;
1732 }
1733 EXPORT_SYMBOL(import_single_range);
1734
1735 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1736                             int (*f)(struct kvec *vec, void *context),
1737                             void *context)
1738 {
1739         struct kvec w;
1740         int err = -EINVAL;
1741         if (!bytes)
1742                 return 0;
1743
1744         iterate_all_kinds(i, bytes, v, -EINVAL, ({
1745                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
1746                 w.iov_len = v.bv_len;
1747                 err = f(&w, context);
1748                 kunmap(v.bv_page);
1749                 err;}), ({
1750                 w = v;
1751                 err = f(&w, context);})
1752         )
1753         return err;
1754 }
1755 EXPORT_SYMBOL(iov_iter_for_each_range);