Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[platform/kernel/linux-starfive.git] / fs / fuse / readdir.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18         struct fuse_conn *fc = get_fuse_conn(dir);
19         struct fuse_inode *fi = get_fuse_inode(dir);
20
21         if (!fc->do_readdirplus)
22                 return false;
23         if (!fc->readdirplus_auto)
24                 return true;
25         if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26                 return true;
27         if (ctx->pos == 0)
28                 return true;
29         return false;
30 }
31
32 static void fuse_add_dirent_to_cache(struct file *file,
33                                      struct fuse_dirent *dirent, loff_t pos)
34 {
35         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36         size_t reclen = FUSE_DIRENT_SIZE(dirent);
37         pgoff_t index;
38         struct page *page;
39         loff_t size;
40         u64 version;
41         unsigned int offset;
42         void *addr;
43
44         spin_lock(&fi->rdc.lock);
45         /*
46          * Is cache already completed?  Or this entry does not go at the end of
47          * cache?
48          */
49         if (fi->rdc.cached || pos != fi->rdc.pos) {
50                 spin_unlock(&fi->rdc.lock);
51                 return;
52         }
53         version = fi->rdc.version;
54         size = fi->rdc.size;
55         offset = size & ~PAGE_MASK;
56         index = size >> PAGE_SHIFT;
57         /* Dirent doesn't fit in current page?  Jump to next page. */
58         if (offset + reclen > PAGE_SIZE) {
59                 index++;
60                 offset = 0;
61         }
62         spin_unlock(&fi->rdc.lock);
63
64         if (offset) {
65                 page = find_lock_page(file->f_mapping, index);
66         } else {
67                 page = find_or_create_page(file->f_mapping, index,
68                                            mapping_gfp_mask(file->f_mapping));
69         }
70         if (!page)
71                 return;
72
73         spin_lock(&fi->rdc.lock);
74         /* Raced with another readdir */
75         if (fi->rdc.version != version || fi->rdc.size != size ||
76             WARN_ON(fi->rdc.pos != pos))
77                 goto unlock;
78
79         addr = kmap_local_page(page);
80         if (!offset) {
81                 clear_page(addr);
82                 SetPageUptodate(page);
83         }
84         memcpy(addr + offset, dirent, reclen);
85         kunmap_local(addr);
86         fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87         fi->rdc.pos = dirent->off;
88 unlock:
89         spin_unlock(&fi->rdc.lock);
90         unlock_page(page);
91         put_page(page);
92 }
93
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97         loff_t end;
98
99         spin_lock(&fi->rdc.lock);
100         /* does cache end position match current position? */
101         if (fi->rdc.pos != pos) {
102                 spin_unlock(&fi->rdc.lock);
103                 return;
104         }
105
106         fi->rdc.cached = true;
107         end = ALIGN(fi->rdc.size, PAGE_SIZE);
108         spin_unlock(&fi->rdc.lock);
109
110         /* truncate unused tail of cache */
111         truncate_inode_pages(file->f_mapping, end);
112 }
113
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115                       struct fuse_dirent *dirent)
116 {
117         struct fuse_file *ff = file->private_data;
118
119         if (ff->open_flags & FOPEN_CACHE_DIR)
120                 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121
122         return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123                         dirent->type);
124 }
125
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127                          struct dir_context *ctx)
128 {
129         while (nbytes >= FUSE_NAME_OFFSET) {
130                 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
132                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133                         return -EIO;
134                 if (reclen > nbytes)
135                         break;
136                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137                         return -EIO;
138
139                 if (!fuse_emit(file, ctx, dirent))
140                         break;
141
142                 buf += reclen;
143                 nbytes -= reclen;
144                 ctx->pos = dirent->off;
145         }
146
147         return 0;
148 }
149
150 static int fuse_direntplus_link(struct file *file,
151                                 struct fuse_direntplus *direntplus,
152                                 u64 attr_version)
153 {
154         struct fuse_entry_out *o = &direntplus->entry_out;
155         struct fuse_dirent *dirent = &direntplus->dirent;
156         struct dentry *parent = file->f_path.dentry;
157         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158         struct dentry *dentry;
159         struct dentry *alias;
160         struct inode *dir = d_inode(parent);
161         struct fuse_conn *fc;
162         struct inode *inode;
163         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164
165         if (!o->nodeid) {
166                 /*
167                  * Unlike in the case of fuse_lookup, zero nodeid does not mean
168                  * ENOENT. Instead, it only means the userspace filesystem did
169                  * not want to return attributes/handle for this entry.
170                  *
171                  * So do nothing.
172                  */
173                 return 0;
174         }
175
176         if (name.name[0] == '.') {
177                 /*
178                  * We could potentially refresh the attributes of the directory
179                  * and its parent?
180                  */
181                 if (name.len == 1)
182                         return 0;
183                 if (name.name[1] == '.' && name.len == 2)
184                         return 0;
185         }
186
187         if (invalid_nodeid(o->nodeid))
188                 return -EIO;
189         if (fuse_invalid_attr(&o->attr))
190                 return -EIO;
191
192         fc = get_fuse_conn(dir);
193
194         name.hash = full_name_hash(parent, name.name, name.len);
195         dentry = d_lookup(parent, &name);
196         if (!dentry) {
197 retry:
198                 dentry = d_alloc_parallel(parent, &name, &wq);
199                 if (IS_ERR(dentry))
200                         return PTR_ERR(dentry);
201         }
202         if (!d_in_lookup(dentry)) {
203                 struct fuse_inode *fi;
204                 inode = d_inode(dentry);
205                 if (inode && get_node_id(inode) != o->nodeid)
206                         inode = NULL;
207                 if (!inode ||
208                     fuse_stale_inode(inode, o->generation, &o->attr)) {
209                         if (inode)
210                                 fuse_make_bad(inode);
211                         d_invalidate(dentry);
212                         dput(dentry);
213                         goto retry;
214                 }
215                 if (fuse_is_bad(inode)) {
216                         dput(dentry);
217                         return -EIO;
218                 }
219
220                 fi = get_fuse_inode(inode);
221                 spin_lock(&fi->lock);
222                 fi->nlookup++;
223                 spin_unlock(&fi->lock);
224
225                 forget_all_cached_acls(inode);
226                 fuse_change_attributes(inode, &o->attr,
227                                        entry_attr_timeout(o),
228                                        attr_version);
229                 /*
230                  * The other branch comes via fuse_iget()
231                  * which bumps nlookup inside
232                  */
233         } else {
234                 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235                                   &o->attr, entry_attr_timeout(o),
236                                   attr_version);
237                 if (!inode)
238                         inode = ERR_PTR(-ENOMEM);
239
240                 alias = d_splice_alias(inode, dentry);
241                 d_lookup_done(dentry);
242                 if (alias) {
243                         dput(dentry);
244                         dentry = alias;
245                 }
246                 if (IS_ERR(dentry))
247                         return PTR_ERR(dentry);
248         }
249         if (fc->readdirplus_auto)
250                 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
251         fuse_change_entry_timeout(dentry, o);
252
253         dput(dentry);
254         return 0;
255 }
256
257 static void fuse_force_forget(struct file *file, u64 nodeid)
258 {
259         struct inode *inode = file_inode(file);
260         struct fuse_mount *fm = get_fuse_mount(inode);
261         struct fuse_forget_in inarg;
262         FUSE_ARGS(args);
263
264         memset(&inarg, 0, sizeof(inarg));
265         inarg.nlookup = 1;
266         args.opcode = FUSE_FORGET;
267         args.nodeid = nodeid;
268         args.in_numargs = 1;
269         args.in_args[0].size = sizeof(inarg);
270         args.in_args[0].value = &inarg;
271         args.force = true;
272         args.noreply = true;
273
274         fuse_simple_request(fm, &args);
275         /* ignore errors */
276 }
277
278 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
279                              struct dir_context *ctx, u64 attr_version)
280 {
281         struct fuse_direntplus *direntplus;
282         struct fuse_dirent *dirent;
283         size_t reclen;
284         int over = 0;
285         int ret;
286
287         while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
288                 direntplus = (struct fuse_direntplus *) buf;
289                 dirent = &direntplus->dirent;
290                 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
291
292                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
293                         return -EIO;
294                 if (reclen > nbytes)
295                         break;
296                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
297                         return -EIO;
298
299                 if (!over) {
300                         /* We fill entries into dstbuf only as much as
301                            it can hold. But we still continue iterating
302                            over remaining entries to link them. If not,
303                            we need to send a FORGET for each of those
304                            which we did not link.
305                         */
306                         over = !fuse_emit(file, ctx, dirent);
307                         if (!over)
308                                 ctx->pos = dirent->off;
309                 }
310
311                 buf += reclen;
312                 nbytes -= reclen;
313
314                 ret = fuse_direntplus_link(file, direntplus, attr_version);
315                 if (ret)
316                         fuse_force_forget(file, direntplus->entry_out.nodeid);
317         }
318
319         return 0;
320 }
321
322 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
323 {
324         int plus;
325         ssize_t res;
326         struct page *page;
327         struct inode *inode = file_inode(file);
328         struct fuse_mount *fm = get_fuse_mount(inode);
329         struct fuse_io_args ia = {};
330         struct fuse_args_pages *ap = &ia.ap;
331         struct fuse_page_desc desc = { .length = PAGE_SIZE };
332         u64 attr_version = 0;
333         bool locked;
334
335         page = alloc_page(GFP_KERNEL);
336         if (!page)
337                 return -ENOMEM;
338
339         plus = fuse_use_readdirplus(inode, ctx);
340         ap->args.out_pages = true;
341         ap->num_pages = 1;
342         ap->pages = &page;
343         ap->descs = &desc;
344         if (plus) {
345                 attr_version = fuse_get_attr_version(fm->fc);
346                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
347                                     FUSE_READDIRPLUS);
348         } else {
349                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
350                                     FUSE_READDIR);
351         }
352         locked = fuse_lock_inode(inode);
353         res = fuse_simple_request(fm, &ap->args);
354         fuse_unlock_inode(inode, locked);
355         if (res >= 0) {
356                 if (!res) {
357                         struct fuse_file *ff = file->private_data;
358
359                         if (ff->open_flags & FOPEN_CACHE_DIR)
360                                 fuse_readdir_cache_end(file, ctx->pos);
361                 } else if (plus) {
362                         res = parse_dirplusfile(page_address(page), res,
363                                                 file, ctx, attr_version);
364                 } else {
365                         res = parse_dirfile(page_address(page), res, file,
366                                             ctx);
367                 }
368         }
369
370         __free_page(page);
371         fuse_invalidate_atime(inode);
372         return res;
373 }
374
375 enum fuse_parse_result {
376         FOUND_ERR = -1,
377         FOUND_NONE = 0,
378         FOUND_SOME,
379         FOUND_ALL,
380 };
381
382 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
383                                                void *addr, unsigned int size,
384                                                struct dir_context *ctx)
385 {
386         unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
387         enum fuse_parse_result res = FOUND_NONE;
388
389         WARN_ON(offset >= size);
390
391         for (;;) {
392                 struct fuse_dirent *dirent = addr + offset;
393                 unsigned int nbytes = size - offset;
394                 size_t reclen;
395
396                 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
397                         break;
398
399                 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
400
401                 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
402                         return FOUND_ERR;
403                 if (WARN_ON(reclen > nbytes))
404                         return FOUND_ERR;
405                 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
406                         return FOUND_ERR;
407
408                 if (ff->readdir.pos == ctx->pos) {
409                         res = FOUND_SOME;
410                         if (!dir_emit(ctx, dirent->name, dirent->namelen,
411                                       dirent->ino, dirent->type))
412                                 return FOUND_ALL;
413                         ctx->pos = dirent->off;
414                 }
415                 ff->readdir.pos = dirent->off;
416                 ff->readdir.cache_off += reclen;
417
418                 offset += reclen;
419         }
420
421         return res;
422 }
423
424 static void fuse_rdc_reset(struct inode *inode)
425 {
426         struct fuse_inode *fi = get_fuse_inode(inode);
427
428         fi->rdc.cached = false;
429         fi->rdc.version++;
430         fi->rdc.size = 0;
431         fi->rdc.pos = 0;
432 }
433
434 #define UNCACHED 1
435
436 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
437 {
438         struct fuse_file *ff = file->private_data;
439         struct inode *inode = file_inode(file);
440         struct fuse_conn *fc = get_fuse_conn(inode);
441         struct fuse_inode *fi = get_fuse_inode(inode);
442         enum fuse_parse_result res;
443         pgoff_t index;
444         unsigned int size;
445         struct page *page;
446         void *addr;
447
448         /* Seeked?  If so, reset the cache stream */
449         if (ff->readdir.pos != ctx->pos) {
450                 ff->readdir.pos = 0;
451                 ff->readdir.cache_off = 0;
452         }
453
454         /*
455          * We're just about to start reading into the cache or reading the
456          * cache; both cases require an up-to-date mtime value.
457          */
458         if (!ctx->pos && fc->auto_inval_data) {
459                 int err = fuse_update_attributes(inode, file, STATX_MTIME);
460
461                 if (err)
462                         return err;
463         }
464
465 retry:
466         spin_lock(&fi->rdc.lock);
467 retry_locked:
468         if (!fi->rdc.cached) {
469                 /* Starting cache? Set cache mtime. */
470                 if (!ctx->pos && !fi->rdc.size) {
471                         fi->rdc.mtime = inode->i_mtime;
472                         fi->rdc.iversion = inode_query_iversion(inode);
473                 }
474                 spin_unlock(&fi->rdc.lock);
475                 return UNCACHED;
476         }
477         /*
478          * When at the beginning of the directory (i.e. just after opendir(3) or
479          * rewinddir(3)), then need to check whether directory contents have
480          * changed, and reset the cache if so.
481          */
482         if (!ctx->pos) {
483                 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
484                     !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
485                         fuse_rdc_reset(inode);
486                         goto retry_locked;
487                 }
488         }
489
490         /*
491          * If cache version changed since the last getdents() call, then reset
492          * the cache stream.
493          */
494         if (ff->readdir.version != fi->rdc.version) {
495                 ff->readdir.pos = 0;
496                 ff->readdir.cache_off = 0;
497         }
498         /*
499          * If at the beginning of the cache, than reset version to
500          * current.
501          */
502         if (ff->readdir.pos == 0)
503                 ff->readdir.version = fi->rdc.version;
504
505         WARN_ON(fi->rdc.size < ff->readdir.cache_off);
506
507         index = ff->readdir.cache_off >> PAGE_SHIFT;
508
509         if (index == (fi->rdc.size >> PAGE_SHIFT))
510                 size = fi->rdc.size & ~PAGE_MASK;
511         else
512                 size = PAGE_SIZE;
513         spin_unlock(&fi->rdc.lock);
514
515         /* EOF? */
516         if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
517                 return 0;
518
519         page = find_get_page_flags(file->f_mapping, index,
520                                    FGP_ACCESSED | FGP_LOCK);
521         /* Page gone missing, then re-added to cache, but not initialized? */
522         if (page && !PageUptodate(page)) {
523                 unlock_page(page);
524                 put_page(page);
525                 page = NULL;
526         }
527         spin_lock(&fi->rdc.lock);
528         if (!page) {
529                 /*
530                  * Uh-oh: page gone missing, cache is useless
531                  */
532                 if (fi->rdc.version == ff->readdir.version)
533                         fuse_rdc_reset(inode);
534                 goto retry_locked;
535         }
536
537         /* Make sure it's still the same version after getting the page. */
538         if (ff->readdir.version != fi->rdc.version) {
539                 spin_unlock(&fi->rdc.lock);
540                 unlock_page(page);
541                 put_page(page);
542                 goto retry;
543         }
544         spin_unlock(&fi->rdc.lock);
545
546         /*
547          * Contents of the page are now protected against changing by holding
548          * the page lock.
549          */
550         addr = kmap(page);
551         res = fuse_parse_cache(ff, addr, size, ctx);
552         kunmap(page);
553         unlock_page(page);
554         put_page(page);
555
556         if (res == FOUND_ERR)
557                 return -EIO;
558
559         if (res == FOUND_ALL)
560                 return 0;
561
562         if (size == PAGE_SIZE) {
563                 /* We hit end of page: skip to next page. */
564                 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
565                 goto retry;
566         }
567
568         /*
569          * End of cache reached.  If found position, then we are done, otherwise
570          * need to fall back to uncached, since the position we were looking for
571          * wasn't in the cache.
572          */
573         return res == FOUND_SOME ? 0 : UNCACHED;
574 }
575
576 int fuse_readdir(struct file *file, struct dir_context *ctx)
577 {
578         struct fuse_file *ff = file->private_data;
579         struct inode *inode = file_inode(file);
580         int err;
581
582         if (fuse_is_bad(inode))
583                 return -EIO;
584
585         mutex_lock(&ff->readdir.lock);
586
587         err = UNCACHED;
588         if (ff->open_flags & FOPEN_CACHE_DIR)
589                 err = fuse_readdir_cached(file, ctx);
590         if (err == UNCACHED)
591                 err = fuse_readdir_uncached(file, ctx);
592
593         mutex_unlock(&ff->readdir.lock);
594
595         return err;
596 }