mm: introduce page_size()
[platform/kernel/linux-starfive.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/export.h>
3 #include <linux/bvec.h>
4 #include <linux/uio.h>
5 #include <linux/pagemap.h>
6 #include <linux/slab.h>
7 #include <linux/vmalloc.h>
8 #include <linux/splice.h>
9 #include <net/checksum.h>
10 #include <linux/scatterlist.h>
11
12 #define PIPE_PARANOIA /* for now */
13
14 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
15         size_t left;                                    \
16         size_t wanted = n;                              \
17         __p = i->iov;                                   \
18         __v.iov_len = min(n, __p->iov_len - skip);      \
19         if (likely(__v.iov_len)) {                      \
20                 __v.iov_base = __p->iov_base + skip;    \
21                 left = (STEP);                          \
22                 __v.iov_len -= left;                    \
23                 skip += __v.iov_len;                    \
24                 n -= __v.iov_len;                       \
25         } else {                                        \
26                 left = 0;                               \
27         }                                               \
28         while (unlikely(!left && n)) {                  \
29                 __p++;                                  \
30                 __v.iov_len = min(n, __p->iov_len);     \
31                 if (unlikely(!__v.iov_len))             \
32                         continue;                       \
33                 __v.iov_base = __p->iov_base;           \
34                 left = (STEP);                          \
35                 __v.iov_len -= left;                    \
36                 skip = __v.iov_len;                     \
37                 n -= __v.iov_len;                       \
38         }                                               \
39         n = wanted - n;                                 \
40 }
41
42 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
43         size_t wanted = n;                              \
44         __p = i->kvec;                                  \
45         __v.iov_len = min(n, __p->iov_len - skip);      \
46         if (likely(__v.iov_len)) {                      \
47                 __v.iov_base = __p->iov_base + skip;    \
48                 (void)(STEP);                           \
49                 skip += __v.iov_len;                    \
50                 n -= __v.iov_len;                       \
51         }                                               \
52         while (unlikely(n)) {                           \
53                 __p++;                                  \
54                 __v.iov_len = min(n, __p->iov_len);     \
55                 if (unlikely(!__v.iov_len))             \
56                         continue;                       \
57                 __v.iov_base = __p->iov_base;           \
58                 (void)(STEP);                           \
59                 skip = __v.iov_len;                     \
60                 n -= __v.iov_len;                       \
61         }                                               \
62         n = wanted;                                     \
63 }
64
65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
66         struct bvec_iter __start;                       \
67         __start.bi_size = n;                            \
68         __start.bi_bvec_done = skip;                    \
69         __start.bi_idx = 0;                             \
70         for_each_bvec(__v, i->bvec, __bi, __start) {    \
71                 if (!__v.bv_len)                        \
72                         continue;                       \
73                 (void)(STEP);                           \
74         }                                               \
75 }
76
77 #define iterate_all_kinds(i, n, v, I, B, K) {                   \
78         if (likely(n)) {                                        \
79                 size_t skip = i->iov_offset;                    \
80                 if (unlikely(i->type & ITER_BVEC)) {            \
81                         struct bio_vec v;                       \
82                         struct bvec_iter __bi;                  \
83                         iterate_bvec(i, n, v, __bi, skip, (B))  \
84                 } else if (unlikely(i->type & ITER_KVEC)) {     \
85                         const struct kvec *kvec;                \
86                         struct kvec v;                          \
87                         iterate_kvec(i, n, v, kvec, skip, (K))  \
88                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
89                 } else {                                        \
90                         const struct iovec *iov;                \
91                         struct iovec v;                         \
92                         iterate_iovec(i, n, v, iov, skip, (I))  \
93                 }                                               \
94         }                                                       \
95 }
96
97 #define iterate_and_advance(i, n, v, I, B, K) {                 \
98         if (unlikely(i->count < n))                             \
99                 n = i->count;                                   \
100         if (i->count) {                                         \
101                 size_t skip = i->iov_offset;                    \
102                 if (unlikely(i->type & ITER_BVEC)) {            \
103                         const struct bio_vec *bvec = i->bvec;   \
104                         struct bio_vec v;                       \
105                         struct bvec_iter __bi;                  \
106                         iterate_bvec(i, n, v, __bi, skip, (B))  \
107                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
108                         i->nr_segs -= i->bvec - bvec;           \
109                         skip = __bi.bi_bvec_done;               \
110                 } else if (unlikely(i->type & ITER_KVEC)) {     \
111                         const struct kvec *kvec;                \
112                         struct kvec v;                          \
113                         iterate_kvec(i, n, v, kvec, skip, (K))  \
114                         if (skip == kvec->iov_len) {            \
115                                 kvec++;                         \
116                                 skip = 0;                       \
117                         }                                       \
118                         i->nr_segs -= kvec - i->kvec;           \
119                         i->kvec = kvec;                         \
120                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
121                         skip += n;                              \
122                 } else {                                        \
123                         const struct iovec *iov;                \
124                         struct iovec v;                         \
125                         iterate_iovec(i, n, v, iov, skip, (I))  \
126                         if (skip == iov->iov_len) {             \
127                                 iov++;                          \
128                                 skip = 0;                       \
129                         }                                       \
130                         i->nr_segs -= iov - i->iov;             \
131                         i->iov = iov;                           \
132                 }                                               \
133                 i->count -= n;                                  \
134                 i->iov_offset = skip;                           \
135         }                                                       \
136 }
137
138 static int copyout(void __user *to, const void *from, size_t n)
139 {
140         if (access_ok(to, n)) {
141                 kasan_check_read(from, n);
142                 n = raw_copy_to_user(to, from, n);
143         }
144         return n;
145 }
146
147 static int copyin(void *to, const void __user *from, size_t n)
148 {
149         if (access_ok(from, n)) {
150                 kasan_check_write(to, n);
151                 n = raw_copy_from_user(to, from, n);
152         }
153         return n;
154 }
155
156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
157                          struct iov_iter *i)
158 {
159         size_t skip, copy, left, wanted;
160         const struct iovec *iov;
161         char __user *buf;
162         void *kaddr, *from;
163
164         if (unlikely(bytes > i->count))
165                 bytes = i->count;
166
167         if (unlikely(!bytes))
168                 return 0;
169
170         might_fault();
171         wanted = bytes;
172         iov = i->iov;
173         skip = i->iov_offset;
174         buf = iov->iov_base + skip;
175         copy = min(bytes, iov->iov_len - skip);
176
177         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
178                 kaddr = kmap_atomic(page);
179                 from = kaddr + offset;
180
181                 /* first chunk, usually the only one */
182                 left = copyout(buf, from, copy);
183                 copy -= left;
184                 skip += copy;
185                 from += copy;
186                 bytes -= copy;
187
188                 while (unlikely(!left && bytes)) {
189                         iov++;
190                         buf = iov->iov_base;
191                         copy = min(bytes, iov->iov_len);
192                         left = copyout(buf, from, copy);
193                         copy -= left;
194                         skip = copy;
195                         from += copy;
196                         bytes -= copy;
197                 }
198                 if (likely(!bytes)) {
199                         kunmap_atomic(kaddr);
200                         goto done;
201                 }
202                 offset = from - kaddr;
203                 buf += copy;
204                 kunmap_atomic(kaddr);
205                 copy = min(bytes, iov->iov_len - skip);
206         }
207         /* Too bad - revert to non-atomic kmap */
208
209         kaddr = kmap(page);
210         from = kaddr + offset;
211         left = copyout(buf, from, copy);
212         copy -= left;
213         skip += copy;
214         from += copy;
215         bytes -= copy;
216         while (unlikely(!left && bytes)) {
217                 iov++;
218                 buf = iov->iov_base;
219                 copy = min(bytes, iov->iov_len);
220                 left = copyout(buf, from, copy);
221                 copy -= left;
222                 skip = copy;
223                 from += copy;
224                 bytes -= copy;
225         }
226         kunmap(page);
227
228 done:
229         if (skip == iov->iov_len) {
230                 iov++;
231                 skip = 0;
232         }
233         i->count -= wanted - bytes;
234         i->nr_segs -= iov - i->iov;
235         i->iov = iov;
236         i->iov_offset = skip;
237         return wanted - bytes;
238 }
239
240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
241                          struct iov_iter *i)
242 {
243         size_t skip, copy, left, wanted;
244         const struct iovec *iov;
245         char __user *buf;
246         void *kaddr, *to;
247
248         if (unlikely(bytes > i->count))
249                 bytes = i->count;
250
251         if (unlikely(!bytes))
252                 return 0;
253
254         might_fault();
255         wanted = bytes;
256         iov = i->iov;
257         skip = i->iov_offset;
258         buf = iov->iov_base + skip;
259         copy = min(bytes, iov->iov_len - skip);
260
261         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
262                 kaddr = kmap_atomic(page);
263                 to = kaddr + offset;
264
265                 /* first chunk, usually the only one */
266                 left = copyin(to, buf, copy);
267                 copy -= left;
268                 skip += copy;
269                 to += copy;
270                 bytes -= copy;
271
272                 while (unlikely(!left && bytes)) {
273                         iov++;
274                         buf = iov->iov_base;
275                         copy = min(bytes, iov->iov_len);
276                         left = copyin(to, buf, copy);
277                         copy -= left;
278                         skip = copy;
279                         to += copy;
280                         bytes -= copy;
281                 }
282                 if (likely(!bytes)) {
283                         kunmap_atomic(kaddr);
284                         goto done;
285                 }
286                 offset = to - kaddr;
287                 buf += copy;
288                 kunmap_atomic(kaddr);
289                 copy = min(bytes, iov->iov_len - skip);
290         }
291         /* Too bad - revert to non-atomic kmap */
292
293         kaddr = kmap(page);
294         to = kaddr + offset;
295         left = copyin(to, buf, copy);
296         copy -= left;
297         skip += copy;
298         to += copy;
299         bytes -= copy;
300         while (unlikely(!left && bytes)) {
301                 iov++;
302                 buf = iov->iov_base;
303                 copy = min(bytes, iov->iov_len);
304                 left = copyin(to, buf, copy);
305                 copy -= left;
306                 skip = copy;
307                 to += copy;
308                 bytes -= copy;
309         }
310         kunmap(page);
311
312 done:
313         if (skip == iov->iov_len) {
314                 iov++;
315                 skip = 0;
316         }
317         i->count -= wanted - bytes;
318         i->nr_segs -= iov - i->iov;
319         i->iov = iov;
320         i->iov_offset = skip;
321         return wanted - bytes;
322 }
323
324 #ifdef PIPE_PARANOIA
325 static bool sanity(const struct iov_iter *i)
326 {
327         struct pipe_inode_info *pipe = i->pipe;
328         int idx = i->idx;
329         int next = pipe->curbuf + pipe->nrbufs;
330         if (i->iov_offset) {
331                 struct pipe_buffer *p;
332                 if (unlikely(!pipe->nrbufs))
333                         goto Bad;       // pipe must be non-empty
334                 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
335                         goto Bad;       // must be at the last buffer...
336
337                 p = &pipe->bufs[idx];
338                 if (unlikely(p->offset + p->len != i->iov_offset))
339                         goto Bad;       // ... at the end of segment
340         } else {
341                 if (idx != (next & (pipe->buffers - 1)))
342                         goto Bad;       // must be right after the last buffer
343         }
344         return true;
345 Bad:
346         printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
347         printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
348                         pipe->curbuf, pipe->nrbufs, pipe->buffers);
349         for (idx = 0; idx < pipe->buffers; idx++)
350                 printk(KERN_ERR "[%p %p %d %d]\n",
351                         pipe->bufs[idx].ops,
352                         pipe->bufs[idx].page,
353                         pipe->bufs[idx].offset,
354                         pipe->bufs[idx].len);
355         WARN_ON(1);
356         return false;
357 }
358 #else
359 #define sanity(i) true
360 #endif
361
362 static inline int next_idx(int idx, struct pipe_inode_info *pipe)
363 {
364         return (idx + 1) & (pipe->buffers - 1);
365 }
366
367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
368                          struct iov_iter *i)
369 {
370         struct pipe_inode_info *pipe = i->pipe;
371         struct pipe_buffer *buf;
372         size_t off;
373         int idx;
374
375         if (unlikely(bytes > i->count))
376                 bytes = i->count;
377
378         if (unlikely(!bytes))
379                 return 0;
380
381         if (!sanity(i))
382                 return 0;
383
384         off = i->iov_offset;
385         idx = i->idx;
386         buf = &pipe->bufs[idx];
387         if (off) {
388                 if (offset == off && buf->page == page) {
389                         /* merge with the last one */
390                         buf->len += bytes;
391                         i->iov_offset += bytes;
392                         goto out;
393                 }
394                 idx = next_idx(idx, pipe);
395                 buf = &pipe->bufs[idx];
396         }
397         if (idx == pipe->curbuf && pipe->nrbufs)
398                 return 0;
399         pipe->nrbufs++;
400         buf->ops = &page_cache_pipe_buf_ops;
401         get_page(buf->page = page);
402         buf->offset = offset;
403         buf->len = bytes;
404         i->iov_offset = offset + bytes;
405         i->idx = idx;
406 out:
407         i->count -= bytes;
408         return bytes;
409 }
410
411 /*
412  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
413  * bytes.  For each iovec, fault in each page that constitutes the iovec.
414  *
415  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
416  * because it is an invalid address).
417  */
418 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
419 {
420         size_t skip = i->iov_offset;
421         const struct iovec *iov;
422         int err;
423         struct iovec v;
424
425         if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
426                 iterate_iovec(i, bytes, v, iov, skip, ({
427                         err = fault_in_pages_readable(v.iov_base, v.iov_len);
428                         if (unlikely(err))
429                         return err;
430                 0;}))
431         }
432         return 0;
433 }
434 EXPORT_SYMBOL(iov_iter_fault_in_readable);
435
436 void iov_iter_init(struct iov_iter *i, unsigned int direction,
437                         const struct iovec *iov, unsigned long nr_segs,
438                         size_t count)
439 {
440         WARN_ON(direction & ~(READ | WRITE));
441         direction &= READ | WRITE;
442
443         /* It will get better.  Eventually... */
444         if (uaccess_kernel()) {
445                 i->type = ITER_KVEC | direction;
446                 i->kvec = (struct kvec *)iov;
447         } else {
448                 i->type = ITER_IOVEC | direction;
449                 i->iov = iov;
450         }
451         i->nr_segs = nr_segs;
452         i->iov_offset = 0;
453         i->count = count;
454 }
455 EXPORT_SYMBOL(iov_iter_init);
456
457 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
458 {
459         char *from = kmap_atomic(page);
460         memcpy(to, from + offset, len);
461         kunmap_atomic(from);
462 }
463
464 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
465 {
466         char *to = kmap_atomic(page);
467         memcpy(to + offset, from, len);
468         kunmap_atomic(to);
469 }
470
471 static void memzero_page(struct page *page, size_t offset, size_t len)
472 {
473         char *addr = kmap_atomic(page);
474         memset(addr + offset, 0, len);
475         kunmap_atomic(addr);
476 }
477
478 static inline bool allocated(struct pipe_buffer *buf)
479 {
480         return buf->ops == &default_pipe_buf_ops;
481 }
482
483 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
484 {
485         size_t off = i->iov_offset;
486         int idx = i->idx;
487         if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
488                 idx = next_idx(idx, i->pipe);
489                 off = 0;
490         }
491         *idxp = idx;
492         *offp = off;
493 }
494
495 static size_t push_pipe(struct iov_iter *i, size_t size,
496                         int *idxp, size_t *offp)
497 {
498         struct pipe_inode_info *pipe = i->pipe;
499         size_t off;
500         int idx;
501         ssize_t left;
502
503         if (unlikely(size > i->count))
504                 size = i->count;
505         if (unlikely(!size))
506                 return 0;
507
508         left = size;
509         data_start(i, &idx, &off);
510         *idxp = idx;
511         *offp = off;
512         if (off) {
513                 left -= PAGE_SIZE - off;
514                 if (left <= 0) {
515                         pipe->bufs[idx].len += size;
516                         return size;
517                 }
518                 pipe->bufs[idx].len = PAGE_SIZE;
519                 idx = next_idx(idx, pipe);
520         }
521         while (idx != pipe->curbuf || !pipe->nrbufs) {
522                 struct page *page = alloc_page(GFP_USER);
523                 if (!page)
524                         break;
525                 pipe->nrbufs++;
526                 pipe->bufs[idx].ops = &default_pipe_buf_ops;
527                 pipe->bufs[idx].page = page;
528                 pipe->bufs[idx].offset = 0;
529                 if (left <= PAGE_SIZE) {
530                         pipe->bufs[idx].len = left;
531                         return size;
532                 }
533                 pipe->bufs[idx].len = PAGE_SIZE;
534                 left -= PAGE_SIZE;
535                 idx = next_idx(idx, pipe);
536         }
537         return size - left;
538 }
539
540 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
541                                 struct iov_iter *i)
542 {
543         struct pipe_inode_info *pipe = i->pipe;
544         size_t n, off;
545         int idx;
546
547         if (!sanity(i))
548                 return 0;
549
550         bytes = n = push_pipe(i, bytes, &idx, &off);
551         if (unlikely(!n))
552                 return 0;
553         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
554                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
555                 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
556                 i->idx = idx;
557                 i->iov_offset = off + chunk;
558                 n -= chunk;
559                 addr += chunk;
560         }
561         i->count -= bytes;
562         return bytes;
563 }
564
565 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
566                               __wsum sum, size_t off)
567 {
568         __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
569         return csum_block_add(sum, next, off);
570 }
571
572 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
573                                 __wsum *csum, struct iov_iter *i)
574 {
575         struct pipe_inode_info *pipe = i->pipe;
576         size_t n, r;
577         size_t off = 0;
578         __wsum sum = *csum;
579         int idx;
580
581         if (!sanity(i))
582                 return 0;
583
584         bytes = n = push_pipe(i, bytes, &idx, &r);
585         if (unlikely(!n))
586                 return 0;
587         for ( ; n; idx = next_idx(idx, pipe), r = 0) {
588                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
589                 char *p = kmap_atomic(pipe->bufs[idx].page);
590                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
591                 kunmap_atomic(p);
592                 i->idx = idx;
593                 i->iov_offset = r + chunk;
594                 n -= chunk;
595                 off += chunk;
596                 addr += chunk;
597         }
598         i->count -= bytes;
599         *csum = sum;
600         return bytes;
601 }
602
603 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
604 {
605         const char *from = addr;
606         if (unlikely(iov_iter_is_pipe(i)))
607                 return copy_pipe_to_iter(addr, bytes, i);
608         if (iter_is_iovec(i))
609                 might_fault();
610         iterate_and_advance(i, bytes, v,
611                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
612                 memcpy_to_page(v.bv_page, v.bv_offset,
613                                (from += v.bv_len) - v.bv_len, v.bv_len),
614                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
615         )
616
617         return bytes;
618 }
619 EXPORT_SYMBOL(_copy_to_iter);
620
621 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
622 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
623 {
624         if (access_ok(to, n)) {
625                 kasan_check_read(from, n);
626                 n = copy_to_user_mcsafe((__force void *) to, from, n);
627         }
628         return n;
629 }
630
631 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
632                 const char *from, size_t len)
633 {
634         unsigned long ret;
635         char *to;
636
637         to = kmap_atomic(page);
638         ret = memcpy_mcsafe(to + offset, from, len);
639         kunmap_atomic(to);
640
641         return ret;
642 }
643
644 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
645                                 struct iov_iter *i)
646 {
647         struct pipe_inode_info *pipe = i->pipe;
648         size_t n, off, xfer = 0;
649         int idx;
650
651         if (!sanity(i))
652                 return 0;
653
654         bytes = n = push_pipe(i, bytes, &idx, &off);
655         if (unlikely(!n))
656                 return 0;
657         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
658                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
659                 unsigned long rem;
660
661                 rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
662                                 chunk);
663                 i->idx = idx;
664                 i->iov_offset = off + chunk - rem;
665                 xfer += chunk - rem;
666                 if (rem)
667                         break;
668                 n -= chunk;
669                 addr += chunk;
670         }
671         i->count -= xfer;
672         return xfer;
673 }
674
675 /**
676  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
677  * @addr: source kernel address
678  * @bytes: total transfer length
679  * @iter: destination iterator
680  *
681  * The pmem driver arranges for filesystem-dax to use this facility via
682  * dax_copy_to_iter() for protecting read/write to persistent memory.
683  * Unless / until an architecture can guarantee identical performance
684  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
685  * performance regression to switch more users to the mcsafe version.
686  *
687  * Otherwise, the main differences between this and typical _copy_to_iter().
688  *
689  * * Typical tail/residue handling after a fault retries the copy
690  *   byte-by-byte until the fault happens again. Re-triggering machine
691  *   checks is potentially fatal so the implementation uses source
692  *   alignment and poison alignment assumptions to avoid re-triggering
693  *   hardware exceptions.
694  *
695  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
696  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
697  *   a short copy.
698  *
699  * See MCSAFE_TEST for self-test.
700  */
701 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
702 {
703         const char *from = addr;
704         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
705
706         if (unlikely(iov_iter_is_pipe(i)))
707                 return copy_pipe_to_iter_mcsafe(addr, bytes, i);
708         if (iter_is_iovec(i))
709                 might_fault();
710         iterate_and_advance(i, bytes, v,
711                 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
712                 ({
713                 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
714                                (from += v.bv_len) - v.bv_len, v.bv_len);
715                 if (rem) {
716                         curr_addr = (unsigned long) from;
717                         bytes = curr_addr - s_addr - rem;
718                         return bytes;
719                 }
720                 }),
721                 ({
722                 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
723                                 v.iov_len);
724                 if (rem) {
725                         curr_addr = (unsigned long) from;
726                         bytes = curr_addr - s_addr - rem;
727                         return bytes;
728                 }
729                 })
730         )
731
732         return bytes;
733 }
734 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
735 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
736
737 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
738 {
739         char *to = addr;
740         if (unlikely(iov_iter_is_pipe(i))) {
741                 WARN_ON(1);
742                 return 0;
743         }
744         if (iter_is_iovec(i))
745                 might_fault();
746         iterate_and_advance(i, bytes, v,
747                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
748                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
749                                  v.bv_offset, v.bv_len),
750                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
751         )
752
753         return bytes;
754 }
755 EXPORT_SYMBOL(_copy_from_iter);
756
757 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
758 {
759         char *to = addr;
760         if (unlikely(iov_iter_is_pipe(i))) {
761                 WARN_ON(1);
762                 return false;
763         }
764         if (unlikely(i->count < bytes))
765                 return false;
766
767         if (iter_is_iovec(i))
768                 might_fault();
769         iterate_all_kinds(i, bytes, v, ({
770                 if (copyin((to += v.iov_len) - v.iov_len,
771                                       v.iov_base, v.iov_len))
772                         return false;
773                 0;}),
774                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
775                                  v.bv_offset, v.bv_len),
776                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
777         )
778
779         iov_iter_advance(i, bytes);
780         return true;
781 }
782 EXPORT_SYMBOL(_copy_from_iter_full);
783
784 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
785 {
786         char *to = addr;
787         if (unlikely(iov_iter_is_pipe(i))) {
788                 WARN_ON(1);
789                 return 0;
790         }
791         iterate_and_advance(i, bytes, v,
792                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
793                                          v.iov_base, v.iov_len),
794                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
795                                  v.bv_offset, v.bv_len),
796                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
797         )
798
799         return bytes;
800 }
801 EXPORT_SYMBOL(_copy_from_iter_nocache);
802
803 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
804 /**
805  * _copy_from_iter_flushcache - write destination through cpu cache
806  * @addr: destination kernel address
807  * @bytes: total transfer length
808  * @iter: source iterator
809  *
810  * The pmem driver arranges for filesystem-dax to use this facility via
811  * dax_copy_from_iter() for ensuring that writes to persistent memory
812  * are flushed through the CPU cache. It is differentiated from
813  * _copy_from_iter_nocache() in that guarantees all data is flushed for
814  * all iterator types. The _copy_from_iter_nocache() only attempts to
815  * bypass the cache for the ITER_IOVEC case, and on some archs may use
816  * instructions that strand dirty-data in the cache.
817  */
818 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
819 {
820         char *to = addr;
821         if (unlikely(iov_iter_is_pipe(i))) {
822                 WARN_ON(1);
823                 return 0;
824         }
825         iterate_and_advance(i, bytes, v,
826                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
827                                          v.iov_base, v.iov_len),
828                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
829                                  v.bv_offset, v.bv_len),
830                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
831                         v.iov_len)
832         )
833
834         return bytes;
835 }
836 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
837 #endif
838
839 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
840 {
841         char *to = addr;
842         if (unlikely(iov_iter_is_pipe(i))) {
843                 WARN_ON(1);
844                 return false;
845         }
846         if (unlikely(i->count < bytes))
847                 return false;
848         iterate_all_kinds(i, bytes, v, ({
849                 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
850                                              v.iov_base, v.iov_len))
851                         return false;
852                 0;}),
853                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
854                                  v.bv_offset, v.bv_len),
855                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
856         )
857
858         iov_iter_advance(i, bytes);
859         return true;
860 }
861 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
862
863 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
864 {
865         struct page *head;
866         size_t v = n + offset;
867
868         /*
869          * The general case needs to access the page order in order
870          * to compute the page size.
871          * However, we mostly deal with order-0 pages and thus can
872          * avoid a possible cache line miss for requests that fit all
873          * page orders.
874          */
875         if (n <= v && v <= PAGE_SIZE)
876                 return true;
877
878         head = compound_head(page);
879         v += (page - head) << PAGE_SHIFT;
880
881         if (likely(n <= v && v <= (page_size(head))))
882                 return true;
883         WARN_ON(1);
884         return false;
885 }
886
887 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
888                          struct iov_iter *i)
889 {
890         if (unlikely(!page_copy_sane(page, offset, bytes)))
891                 return 0;
892         if (i->type & (ITER_BVEC|ITER_KVEC)) {
893                 void *kaddr = kmap_atomic(page);
894                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
895                 kunmap_atomic(kaddr);
896                 return wanted;
897         } else if (unlikely(iov_iter_is_discard(i)))
898                 return bytes;
899         else if (likely(!iov_iter_is_pipe(i)))
900                 return copy_page_to_iter_iovec(page, offset, bytes, i);
901         else
902                 return copy_page_to_iter_pipe(page, offset, bytes, i);
903 }
904 EXPORT_SYMBOL(copy_page_to_iter);
905
906 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
907                          struct iov_iter *i)
908 {
909         if (unlikely(!page_copy_sane(page, offset, bytes)))
910                 return 0;
911         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
912                 WARN_ON(1);
913                 return 0;
914         }
915         if (i->type & (ITER_BVEC|ITER_KVEC)) {
916                 void *kaddr = kmap_atomic(page);
917                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
918                 kunmap_atomic(kaddr);
919                 return wanted;
920         } else
921                 return copy_page_from_iter_iovec(page, offset, bytes, i);
922 }
923 EXPORT_SYMBOL(copy_page_from_iter);
924
925 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
926 {
927         struct pipe_inode_info *pipe = i->pipe;
928         size_t n, off;
929         int idx;
930
931         if (!sanity(i))
932                 return 0;
933
934         bytes = n = push_pipe(i, bytes, &idx, &off);
935         if (unlikely(!n))
936                 return 0;
937
938         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
939                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
940                 memzero_page(pipe->bufs[idx].page, off, chunk);
941                 i->idx = idx;
942                 i->iov_offset = off + chunk;
943                 n -= chunk;
944         }
945         i->count -= bytes;
946         return bytes;
947 }
948
949 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
950 {
951         if (unlikely(iov_iter_is_pipe(i)))
952                 return pipe_zero(bytes, i);
953         iterate_and_advance(i, bytes, v,
954                 clear_user(v.iov_base, v.iov_len),
955                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
956                 memset(v.iov_base, 0, v.iov_len)
957         )
958
959         return bytes;
960 }
961 EXPORT_SYMBOL(iov_iter_zero);
962
963 size_t iov_iter_copy_from_user_atomic(struct page *page,
964                 struct iov_iter *i, unsigned long offset, size_t bytes)
965 {
966         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
967         if (unlikely(!page_copy_sane(page, offset, bytes))) {
968                 kunmap_atomic(kaddr);
969                 return 0;
970         }
971         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
972                 kunmap_atomic(kaddr);
973                 WARN_ON(1);
974                 return 0;
975         }
976         iterate_all_kinds(i, bytes, v,
977                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
978                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
979                                  v.bv_offset, v.bv_len),
980                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
981         )
982         kunmap_atomic(kaddr);
983         return bytes;
984 }
985 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
986
987 static inline void pipe_truncate(struct iov_iter *i)
988 {
989         struct pipe_inode_info *pipe = i->pipe;
990         if (pipe->nrbufs) {
991                 size_t off = i->iov_offset;
992                 int idx = i->idx;
993                 int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
994                 if (off) {
995                         pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
996                         idx = next_idx(idx, pipe);
997                         nrbufs++;
998                 }
999                 while (pipe->nrbufs > nrbufs) {
1000                         pipe_buf_release(pipe, &pipe->bufs[idx]);
1001                         idx = next_idx(idx, pipe);
1002                         pipe->nrbufs--;
1003                 }
1004         }
1005 }
1006
1007 static void pipe_advance(struct iov_iter *i, size_t size)
1008 {
1009         struct pipe_inode_info *pipe = i->pipe;
1010         if (unlikely(i->count < size))
1011                 size = i->count;
1012         if (size) {
1013                 struct pipe_buffer *buf;
1014                 size_t off = i->iov_offset, left = size;
1015                 int idx = i->idx;
1016                 if (off) /* make it relative to the beginning of buffer */
1017                         left += off - pipe->bufs[idx].offset;
1018                 while (1) {
1019                         buf = &pipe->bufs[idx];
1020                         if (left <= buf->len)
1021                                 break;
1022                         left -= buf->len;
1023                         idx = next_idx(idx, pipe);
1024                 }
1025                 i->idx = idx;
1026                 i->iov_offset = buf->offset + left;
1027         }
1028         i->count -= size;
1029         /* ... and discard everything past that point */
1030         pipe_truncate(i);
1031 }
1032
1033 void iov_iter_advance(struct iov_iter *i, size_t size)
1034 {
1035         if (unlikely(iov_iter_is_pipe(i))) {
1036                 pipe_advance(i, size);
1037                 return;
1038         }
1039         if (unlikely(iov_iter_is_discard(i))) {
1040                 i->count -= size;
1041                 return;
1042         }
1043         iterate_and_advance(i, size, v, 0, 0, 0)
1044 }
1045 EXPORT_SYMBOL(iov_iter_advance);
1046
1047 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1048 {
1049         if (!unroll)
1050                 return;
1051         if (WARN_ON(unroll > MAX_RW_COUNT))
1052                 return;
1053         i->count += unroll;
1054         if (unlikely(iov_iter_is_pipe(i))) {
1055                 struct pipe_inode_info *pipe = i->pipe;
1056                 int idx = i->idx;
1057                 size_t off = i->iov_offset;
1058                 while (1) {
1059                         size_t n = off - pipe->bufs[idx].offset;
1060                         if (unroll < n) {
1061                                 off -= unroll;
1062                                 break;
1063                         }
1064                         unroll -= n;
1065                         if (!unroll && idx == i->start_idx) {
1066                                 off = 0;
1067                                 break;
1068                         }
1069                         if (!idx--)
1070                                 idx = pipe->buffers - 1;
1071                         off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1072                 }
1073                 i->iov_offset = off;
1074                 i->idx = idx;
1075                 pipe_truncate(i);
1076                 return;
1077         }
1078         if (unlikely(iov_iter_is_discard(i)))
1079                 return;
1080         if (unroll <= i->iov_offset) {
1081                 i->iov_offset -= unroll;
1082                 return;
1083         }
1084         unroll -= i->iov_offset;
1085         if (iov_iter_is_bvec(i)) {
1086                 const struct bio_vec *bvec = i->bvec;
1087                 while (1) {
1088                         size_t n = (--bvec)->bv_len;
1089                         i->nr_segs++;
1090                         if (unroll <= n) {
1091                                 i->bvec = bvec;
1092                                 i->iov_offset = n - unroll;
1093                                 return;
1094                         }
1095                         unroll -= n;
1096                 }
1097         } else { /* same logics for iovec and kvec */
1098                 const struct iovec *iov = i->iov;
1099                 while (1) {
1100                         size_t n = (--iov)->iov_len;
1101                         i->nr_segs++;
1102                         if (unroll <= n) {
1103                                 i->iov = iov;
1104                                 i->iov_offset = n - unroll;
1105                                 return;
1106                         }
1107                         unroll -= n;
1108                 }
1109         }
1110 }
1111 EXPORT_SYMBOL(iov_iter_revert);
1112
1113 /*
1114  * Return the count of just the current iov_iter segment.
1115  */
1116 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1117 {
1118         if (unlikely(iov_iter_is_pipe(i)))
1119                 return i->count;        // it is a silly place, anyway
1120         if (i->nr_segs == 1)
1121                 return i->count;
1122         if (unlikely(iov_iter_is_discard(i)))
1123                 return i->count;
1124         else if (iov_iter_is_bvec(i))
1125                 return min(i->count, i->bvec->bv_len - i->iov_offset);
1126         else
1127                 return min(i->count, i->iov->iov_len - i->iov_offset);
1128 }
1129 EXPORT_SYMBOL(iov_iter_single_seg_count);
1130
1131 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1132                         const struct kvec *kvec, unsigned long nr_segs,
1133                         size_t count)
1134 {
1135         WARN_ON(direction & ~(READ | WRITE));
1136         i->type = ITER_KVEC | (direction & (READ | WRITE));
1137         i->kvec = kvec;
1138         i->nr_segs = nr_segs;
1139         i->iov_offset = 0;
1140         i->count = count;
1141 }
1142 EXPORT_SYMBOL(iov_iter_kvec);
1143
1144 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1145                         const struct bio_vec *bvec, unsigned long nr_segs,
1146                         size_t count)
1147 {
1148         WARN_ON(direction & ~(READ | WRITE));
1149         i->type = ITER_BVEC | (direction & (READ | WRITE));
1150         i->bvec = bvec;
1151         i->nr_segs = nr_segs;
1152         i->iov_offset = 0;
1153         i->count = count;
1154 }
1155 EXPORT_SYMBOL(iov_iter_bvec);
1156
1157 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1158                         struct pipe_inode_info *pipe,
1159                         size_t count)
1160 {
1161         BUG_ON(direction != READ);
1162         WARN_ON(pipe->nrbufs == pipe->buffers);
1163         i->type = ITER_PIPE | READ;
1164         i->pipe = pipe;
1165         i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1166         i->iov_offset = 0;
1167         i->count = count;
1168         i->start_idx = i->idx;
1169 }
1170 EXPORT_SYMBOL(iov_iter_pipe);
1171
1172 /**
1173  * iov_iter_discard - Initialise an I/O iterator that discards data
1174  * @i: The iterator to initialise.
1175  * @direction: The direction of the transfer.
1176  * @count: The size of the I/O buffer in bytes.
1177  *
1178  * Set up an I/O iterator that just discards everything that's written to it.
1179  * It's only available as a READ iterator.
1180  */
1181 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1182 {
1183         BUG_ON(direction != READ);
1184         i->type = ITER_DISCARD | READ;
1185         i->count = count;
1186         i->iov_offset = 0;
1187 }
1188 EXPORT_SYMBOL(iov_iter_discard);
1189
1190 unsigned long iov_iter_alignment(const struct iov_iter *i)
1191 {
1192         unsigned long res = 0;
1193         size_t size = i->count;
1194
1195         if (unlikely(iov_iter_is_pipe(i))) {
1196                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1197                         return size | i->iov_offset;
1198                 return size;
1199         }
1200         iterate_all_kinds(i, size, v,
1201                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1202                 res |= v.bv_offset | v.bv_len,
1203                 res |= (unsigned long)v.iov_base | v.iov_len
1204         )
1205         return res;
1206 }
1207 EXPORT_SYMBOL(iov_iter_alignment);
1208
1209 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1210 {
1211         unsigned long res = 0;
1212         size_t size = i->count;
1213
1214         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1215                 WARN_ON(1);
1216                 return ~0U;
1217         }
1218
1219         iterate_all_kinds(i, size, v,
1220                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1221                         (size != v.iov_len ? size : 0), 0),
1222                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1223                         (size != v.bv_len ? size : 0)),
1224                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1225                         (size != v.iov_len ? size : 0))
1226                 );
1227         return res;
1228 }
1229 EXPORT_SYMBOL(iov_iter_gap_alignment);
1230
1231 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1232                                 size_t maxsize,
1233                                 struct page **pages,
1234                                 int idx,
1235                                 size_t *start)
1236 {
1237         struct pipe_inode_info *pipe = i->pipe;
1238         ssize_t n = push_pipe(i, maxsize, &idx, start);
1239         if (!n)
1240                 return -EFAULT;
1241
1242         maxsize = n;
1243         n += *start;
1244         while (n > 0) {
1245                 get_page(*pages++ = pipe->bufs[idx].page);
1246                 idx = next_idx(idx, pipe);
1247                 n -= PAGE_SIZE;
1248         }
1249
1250         return maxsize;
1251 }
1252
1253 static ssize_t pipe_get_pages(struct iov_iter *i,
1254                    struct page **pages, size_t maxsize, unsigned maxpages,
1255                    size_t *start)
1256 {
1257         unsigned npages;
1258         size_t capacity;
1259         int idx;
1260
1261         if (!maxsize)
1262                 return 0;
1263
1264         if (!sanity(i))
1265                 return -EFAULT;
1266
1267         data_start(i, &idx, start);
1268         /* some of this one + all after this one */
1269         npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1270         capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1271
1272         return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1273 }
1274
1275 ssize_t iov_iter_get_pages(struct iov_iter *i,
1276                    struct page **pages, size_t maxsize, unsigned maxpages,
1277                    size_t *start)
1278 {
1279         if (maxsize > i->count)
1280                 maxsize = i->count;
1281
1282         if (unlikely(iov_iter_is_pipe(i)))
1283                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1284         if (unlikely(iov_iter_is_discard(i)))
1285                 return -EFAULT;
1286
1287         iterate_all_kinds(i, maxsize, v, ({
1288                 unsigned long addr = (unsigned long)v.iov_base;
1289                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1290                 int n;
1291                 int res;
1292
1293                 if (len > maxpages * PAGE_SIZE)
1294                         len = maxpages * PAGE_SIZE;
1295                 addr &= ~(PAGE_SIZE - 1);
1296                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1297                 res = get_user_pages_fast(addr, n,
1298                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1299                                 pages);
1300                 if (unlikely(res < 0))
1301                         return res;
1302                 return (res == n ? len : res * PAGE_SIZE) - *start;
1303         0;}),({
1304                 /* can't be more than PAGE_SIZE */
1305                 *start = v.bv_offset;
1306                 get_page(*pages = v.bv_page);
1307                 return v.bv_len;
1308         }),({
1309                 return -EFAULT;
1310         })
1311         )
1312         return 0;
1313 }
1314 EXPORT_SYMBOL(iov_iter_get_pages);
1315
1316 static struct page **get_pages_array(size_t n)
1317 {
1318         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1319 }
1320
1321 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1322                    struct page ***pages, size_t maxsize,
1323                    size_t *start)
1324 {
1325         struct page **p;
1326         ssize_t n;
1327         int idx;
1328         int npages;
1329
1330         if (!maxsize)
1331                 return 0;
1332
1333         if (!sanity(i))
1334                 return -EFAULT;
1335
1336         data_start(i, &idx, start);
1337         /* some of this one + all after this one */
1338         npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1339         n = npages * PAGE_SIZE - *start;
1340         if (maxsize > n)
1341                 maxsize = n;
1342         else
1343                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1344         p = get_pages_array(npages);
1345         if (!p)
1346                 return -ENOMEM;
1347         n = __pipe_get_pages(i, maxsize, p, idx, start);
1348         if (n > 0)
1349                 *pages = p;
1350         else
1351                 kvfree(p);
1352         return n;
1353 }
1354
1355 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1356                    struct page ***pages, size_t maxsize,
1357                    size_t *start)
1358 {
1359         struct page **p;
1360
1361         if (maxsize > i->count)
1362                 maxsize = i->count;
1363
1364         if (unlikely(iov_iter_is_pipe(i)))
1365                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1366         if (unlikely(iov_iter_is_discard(i)))
1367                 return -EFAULT;
1368
1369         iterate_all_kinds(i, maxsize, v, ({
1370                 unsigned long addr = (unsigned long)v.iov_base;
1371                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1372                 int n;
1373                 int res;
1374
1375                 addr &= ~(PAGE_SIZE - 1);
1376                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1377                 p = get_pages_array(n);
1378                 if (!p)
1379                         return -ENOMEM;
1380                 res = get_user_pages_fast(addr, n,
1381                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1382                 if (unlikely(res < 0)) {
1383                         kvfree(p);
1384                         return res;
1385                 }
1386                 *pages = p;
1387                 return (res == n ? len : res * PAGE_SIZE) - *start;
1388         0;}),({
1389                 /* can't be more than PAGE_SIZE */
1390                 *start = v.bv_offset;
1391                 *pages = p = get_pages_array(1);
1392                 if (!p)
1393                         return -ENOMEM;
1394                 get_page(*p = v.bv_page);
1395                 return v.bv_len;
1396         }),({
1397                 return -EFAULT;
1398         })
1399         )
1400         return 0;
1401 }
1402 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1403
1404 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1405                                struct iov_iter *i)
1406 {
1407         char *to = addr;
1408         __wsum sum, next;
1409         size_t off = 0;
1410         sum = *csum;
1411         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1412                 WARN_ON(1);
1413                 return 0;
1414         }
1415         iterate_and_advance(i, bytes, v, ({
1416                 int err = 0;
1417                 next = csum_and_copy_from_user(v.iov_base,
1418                                                (to += v.iov_len) - v.iov_len,
1419                                                v.iov_len, 0, &err);
1420                 if (!err) {
1421                         sum = csum_block_add(sum, next, off);
1422                         off += v.iov_len;
1423                 }
1424                 err ? v.iov_len : 0;
1425         }), ({
1426                 char *p = kmap_atomic(v.bv_page);
1427                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1428                                       p + v.bv_offset, v.bv_len,
1429                                       sum, off);
1430                 kunmap_atomic(p);
1431                 off += v.bv_len;
1432         }),({
1433                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1434                                       v.iov_base, v.iov_len,
1435                                       sum, off);
1436                 off += v.iov_len;
1437         })
1438         )
1439         *csum = sum;
1440         return bytes;
1441 }
1442 EXPORT_SYMBOL(csum_and_copy_from_iter);
1443
1444 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1445                                struct iov_iter *i)
1446 {
1447         char *to = addr;
1448         __wsum sum, next;
1449         size_t off = 0;
1450         sum = *csum;
1451         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1452                 WARN_ON(1);
1453                 return false;
1454         }
1455         if (unlikely(i->count < bytes))
1456                 return false;
1457         iterate_all_kinds(i, bytes, v, ({
1458                 int err = 0;
1459                 next = csum_and_copy_from_user(v.iov_base,
1460                                                (to += v.iov_len) - v.iov_len,
1461                                                v.iov_len, 0, &err);
1462                 if (err)
1463                         return false;
1464                 sum = csum_block_add(sum, next, off);
1465                 off += v.iov_len;
1466                 0;
1467         }), ({
1468                 char *p = kmap_atomic(v.bv_page);
1469                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1470                                       p + v.bv_offset, v.bv_len,
1471                                       sum, off);
1472                 kunmap_atomic(p);
1473                 off += v.bv_len;
1474         }),({
1475                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1476                                       v.iov_base, v.iov_len,
1477                                       sum, off);
1478                 off += v.iov_len;
1479         })
1480         )
1481         *csum = sum;
1482         iov_iter_advance(i, bytes);
1483         return true;
1484 }
1485 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1486
1487 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
1488                              struct iov_iter *i)
1489 {
1490         const char *from = addr;
1491         __wsum *csum = csump;
1492         __wsum sum, next;
1493         size_t off = 0;
1494
1495         if (unlikely(iov_iter_is_pipe(i)))
1496                 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
1497
1498         sum = *csum;
1499         if (unlikely(iov_iter_is_discard(i))) {
1500                 WARN_ON(1);     /* for now */
1501                 return 0;
1502         }
1503         iterate_and_advance(i, bytes, v, ({
1504                 int err = 0;
1505                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1506                                              v.iov_base,
1507                                              v.iov_len, 0, &err);
1508                 if (!err) {
1509                         sum = csum_block_add(sum, next, off);
1510                         off += v.iov_len;
1511                 }
1512                 err ? v.iov_len : 0;
1513         }), ({
1514                 char *p = kmap_atomic(v.bv_page);
1515                 sum = csum_and_memcpy(p + v.bv_offset,
1516                                       (from += v.bv_len) - v.bv_len,
1517                                       v.bv_len, sum, off);
1518                 kunmap_atomic(p);
1519                 off += v.bv_len;
1520         }),({
1521                 sum = csum_and_memcpy(v.iov_base,
1522                                      (from += v.iov_len) - v.iov_len,
1523                                      v.iov_len, sum, off);
1524                 off += v.iov_len;
1525         })
1526         )
1527         *csum = sum;
1528         return bytes;
1529 }
1530 EXPORT_SYMBOL(csum_and_copy_to_iter);
1531
1532 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1533                 struct iov_iter *i)
1534 {
1535 #ifdef CONFIG_CRYPTO
1536         struct ahash_request *hash = hashp;
1537         struct scatterlist sg;
1538         size_t copied;
1539
1540         copied = copy_to_iter(addr, bytes, i);
1541         sg_init_one(&sg, addr, copied);
1542         ahash_request_set_crypt(hash, &sg, NULL, copied);
1543         crypto_ahash_update(hash);
1544         return copied;
1545 #else
1546         return 0;
1547 #endif
1548 }
1549 EXPORT_SYMBOL(hash_and_copy_to_iter);
1550
1551 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1552 {
1553         size_t size = i->count;
1554         int npages = 0;
1555
1556         if (!size)
1557                 return 0;
1558         if (unlikely(iov_iter_is_discard(i)))
1559                 return 0;
1560
1561         if (unlikely(iov_iter_is_pipe(i))) {
1562                 struct pipe_inode_info *pipe = i->pipe;
1563                 size_t off;
1564                 int idx;
1565
1566                 if (!sanity(i))
1567                         return 0;
1568
1569                 data_start(i, &idx, &off);
1570                 /* some of this one + all after this one */
1571                 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1572                 if (npages >= maxpages)
1573                         return maxpages;
1574         } else iterate_all_kinds(i, size, v, ({
1575                 unsigned long p = (unsigned long)v.iov_base;
1576                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1577                         - p / PAGE_SIZE;
1578                 if (npages >= maxpages)
1579                         return maxpages;
1580         0;}),({
1581                 npages++;
1582                 if (npages >= maxpages)
1583                         return maxpages;
1584         }),({
1585                 unsigned long p = (unsigned long)v.iov_base;
1586                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1587                         - p / PAGE_SIZE;
1588                 if (npages >= maxpages)
1589                         return maxpages;
1590         })
1591         )
1592         return npages;
1593 }
1594 EXPORT_SYMBOL(iov_iter_npages);
1595
1596 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1597 {
1598         *new = *old;
1599         if (unlikely(iov_iter_is_pipe(new))) {
1600                 WARN_ON(1);
1601                 return NULL;
1602         }
1603         if (unlikely(iov_iter_is_discard(new)))
1604                 return NULL;
1605         if (iov_iter_is_bvec(new))
1606                 return new->bvec = kmemdup(new->bvec,
1607                                     new->nr_segs * sizeof(struct bio_vec),
1608                                     flags);
1609         else
1610                 /* iovec and kvec have identical layout */
1611                 return new->iov = kmemdup(new->iov,
1612                                    new->nr_segs * sizeof(struct iovec),
1613                                    flags);
1614 }
1615 EXPORT_SYMBOL(dup_iter);
1616
1617 /**
1618  * import_iovec() - Copy an array of &struct iovec from userspace
1619  *     into the kernel, check that it is valid, and initialize a new
1620  *     &struct iov_iter iterator to access it.
1621  *
1622  * @type: One of %READ or %WRITE.
1623  * @uvector: Pointer to the userspace array.
1624  * @nr_segs: Number of elements in userspace array.
1625  * @fast_segs: Number of elements in @iov.
1626  * @iov: (input and output parameter) Pointer to pointer to (usually small
1627  *     on-stack) kernel array.
1628  * @i: Pointer to iterator that will be initialized on success.
1629  *
1630  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1631  * then this function places %NULL in *@iov on return. Otherwise, a new
1632  * array will be allocated and the result placed in *@iov. This means that
1633  * the caller may call kfree() on *@iov regardless of whether the small
1634  * on-stack array was used or not (and regardless of whether this function
1635  * returns an error or not).
1636  *
1637  * Return: Negative error code on error, bytes imported on success
1638  */
1639 ssize_t import_iovec(int type, const struct iovec __user * uvector,
1640                  unsigned nr_segs, unsigned fast_segs,
1641                  struct iovec **iov, struct iov_iter *i)
1642 {
1643         ssize_t n;
1644         struct iovec *p;
1645         n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1646                                   *iov, &p);
1647         if (n < 0) {
1648                 if (p != *iov)
1649                         kfree(p);
1650                 *iov = NULL;
1651                 return n;
1652         }
1653         iov_iter_init(i, type, p, nr_segs, n);
1654         *iov = p == *iov ? NULL : p;
1655         return n;
1656 }
1657 EXPORT_SYMBOL(import_iovec);
1658
1659 #ifdef CONFIG_COMPAT
1660 #include <linux/compat.h>
1661
1662 ssize_t compat_import_iovec(int type,
1663                 const struct compat_iovec __user * uvector,
1664                 unsigned nr_segs, unsigned fast_segs,
1665                 struct iovec **iov, struct iov_iter *i)
1666 {
1667         ssize_t n;
1668         struct iovec *p;
1669         n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1670                                   *iov, &p);
1671         if (n < 0) {
1672                 if (p != *iov)
1673                         kfree(p);
1674                 *iov = NULL;
1675                 return n;
1676         }
1677         iov_iter_init(i, type, p, nr_segs, n);
1678         *iov = p == *iov ? NULL : p;
1679         return n;
1680 }
1681 #endif
1682
1683 int import_single_range(int rw, void __user *buf, size_t len,
1684                  struct iovec *iov, struct iov_iter *i)
1685 {
1686         if (len > MAX_RW_COUNT)
1687                 len = MAX_RW_COUNT;
1688         if (unlikely(!access_ok(buf, len)))
1689                 return -EFAULT;
1690
1691         iov->iov_base = buf;
1692         iov->iov_len = len;
1693         iov_iter_init(i, rw, iov, 1, len);
1694         return 0;
1695 }
1696 EXPORT_SYMBOL(import_single_range);
1697
1698 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1699                             int (*f)(struct kvec *vec, void *context),
1700                             void *context)
1701 {
1702         struct kvec w;
1703         int err = -EINVAL;
1704         if (!bytes)
1705                 return 0;
1706
1707         iterate_all_kinds(i, bytes, v, -EINVAL, ({
1708                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
1709                 w.iov_len = v.bv_len;
1710                 err = f(&w, context);
1711                 kunmap(v.bv_page);
1712                 err;}), ({
1713                 w = v;
1714                 err = f(&w, context);})
1715         )
1716         return err;
1717 }
1718 EXPORT_SYMBOL(iov_iter_for_each_range);