linux/fs/fuse/readdir.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9
  10#include "fuse_i.h"
  11#include <linux/iversion.h>
  12#include <linux/posix_acl.h>
  13#include <linux/pagemap.h>
  14#include <linux/highmem.h>
  15
  16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
  17{
  18        struct fuse_conn *fc = get_fuse_conn(dir);
  19        struct fuse_inode *fi = get_fuse_inode(dir);
  20
  21        if (!fc->do_readdirplus)
  22                return false;
  23        if (!fc->readdirplus_auto)
  24                return true;
  25        if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
  26                return true;
  27        if (ctx->pos == 0)
  28                return true;
  29        return false;
  30}
  31
  32static void fuse_add_dirent_to_cache(struct file *file,
  33                                     struct fuse_dirent *dirent, loff_t pos)
  34{
  35        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  36        size_t reclen = FUSE_DIRENT_SIZE(dirent);
  37        pgoff_t index;
  38        struct page *page;
  39        loff_t size;
  40        u64 version;
  41        unsigned int offset;
  42        void *addr;
  43
  44        spin_lock(&fi->rdc.lock);
  45        /*
  46         * Is cache already completed?  Or this entry does not go at the end of
  47         * cache?
  48         */
  49        if (fi->rdc.cached || pos != fi->rdc.pos) {
  50                spin_unlock(&fi->rdc.lock);
  51                return;
  52        }
  53        version = fi->rdc.version;
  54        size = fi->rdc.size;
  55        offset = size & ~PAGE_MASK;
  56        index = size >> PAGE_SHIFT;
  57        /* Dirent doesn't fit in current page?  Jump to next page. */
  58        if (offset + reclen > PAGE_SIZE) {
  59                index++;
  60                offset = 0;
  61        }
  62        spin_unlock(&fi->rdc.lock);
  63
  64        if (offset) {
  65                page = find_lock_page(file->f_mapping, index);
  66        } else {
  67                page = find_or_create_page(file->f_mapping, index,
  68                                           mapping_gfp_mask(file->f_mapping));
  69        }
  70        if (!page)
  71                return;
  72
  73        spin_lock(&fi->rdc.lock);
  74        /* Raced with another readdir */
  75        if (fi->rdc.version != version || fi->rdc.size != size ||
  76            WARN_ON(fi->rdc.pos != pos))
  77                goto unlock;
  78
  79        addr = kmap_atomic(page);
  80        if (!offset)
  81                clear_page(addr);
  82        memcpy(addr + offset, dirent, reclen);
  83        kunmap_atomic(addr);
  84        fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
  85        fi->rdc.pos = dirent->off;
  86unlock:
  87        spin_unlock(&fi->rdc.lock);
  88        unlock_page(page);
  89        put_page(page);
  90}
  91
  92static void fuse_readdir_cache_end(struct file *file, loff_t pos)
  93{
  94        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  95        loff_t end;
  96
  97        spin_lock(&fi->rdc.lock);
  98        /* does cache end position match current position? */
  99        if (fi->rdc.pos != pos) {
 100                spin_unlock(&fi->rdc.lock);
 101                return;
 102        }
 103
 104        fi->rdc.cached = true;
 105        end = ALIGN(fi->rdc.size, PAGE_SIZE);
 106        spin_unlock(&fi->rdc.lock);
 107
 108        /* truncate unused tail of cache */
 109        truncate_inode_pages(file->f_mapping, end);
 110}
 111
 112static bool fuse_emit(struct file *file, struct dir_context *ctx,
 113                      struct fuse_dirent *dirent)
 114{
 115        struct fuse_file *ff = file->private_data;
 116
 117        if (ff->open_flags & FOPEN_CACHE_DIR)
 118                fuse_add_dirent_to_cache(file, dirent, ctx->pos);
 119
 120        return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
 121                        dirent->type);
 122}
 123
 124static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
 125                         struct dir_context *ctx)
 126{
 127        while (nbytes >= FUSE_NAME_OFFSET) {
 128                struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
 129                size_t reclen = FUSE_DIRENT_SIZE(dirent);
 130                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 131                        return -EIO;
 132                if (reclen > nbytes)
 133                        break;
 134                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 135                        return -EIO;
 136
 137                if (!fuse_emit(file, ctx, dirent))
 138                        break;
 139
 140                buf += reclen;
 141                nbytes -= reclen;
 142                ctx->pos = dirent->off;
 143        }
 144
 145        return 0;
 146}
 147
 148static int fuse_direntplus_link(struct file *file,
 149                                struct fuse_direntplus *direntplus,
 150                                u64 attr_version)
 151{
 152        struct fuse_entry_out *o = &direntplus->entry_out;
 153        struct fuse_dirent *dirent = &direntplus->dirent;
 154        struct dentry *parent = file->f_path.dentry;
 155        struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
 156        struct dentry *dentry;
 157        struct dentry *alias;
 158        struct inode *dir = d_inode(parent);
 159        struct fuse_conn *fc;
 160        struct inode *inode;
 161        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 162
 163        if (!o->nodeid) {
 164                /*
 165                 * Unlike in the case of fuse_lookup, zero nodeid does not mean
 166                 * ENOENT. Instead, it only means the userspace filesystem did
 167                 * not want to return attributes/handle for this entry.
 168                 *
 169                 * So do nothing.
 170                 */
 171                return 0;
 172        }
 173
 174        if (name.name[0] == '.') {
 175                /*
 176                 * We could potentially refresh the attributes of the directory
 177                 * and its parent?
 178                 */
 179                if (name.len == 1)
 180                        return 0;
 181                if (name.name[1] == '.' && name.len == 2)
 182                        return 0;
 183        }
 184
 185        if (invalid_nodeid(o->nodeid))
 186                return -EIO;
 187        if (fuse_invalid_attr(&o->attr))
 188                return -EIO;
 189
 190        fc = get_fuse_conn(dir);
 191
 192        name.hash = full_name_hash(parent, name.name, name.len);
 193        dentry = d_lookup(parent, &name);
 194        if (!dentry) {
 195retry:
 196                dentry = d_alloc_parallel(parent, &name, &wq);
 197                if (IS_ERR(dentry))
 198                        return PTR_ERR(dentry);
 199        }
 200        if (!d_in_lookup(dentry)) {
 201                struct fuse_inode *fi;
 202                inode = d_inode(dentry);
 203                if (inode && get_node_id(inode) != o->nodeid)
 204                        inode = NULL;
 205                if (!inode ||
 206                    fuse_stale_inode(inode, o->generation, &o->attr)) {
 207                        if (inode)
 208                                fuse_make_bad(inode);
 209                        d_invalidate(dentry);
 210                        dput(dentry);
 211                        goto retry;
 212                }
 213                if (fuse_is_bad(inode)) {
 214                        dput(dentry);
 215                        return -EIO;
 216                }
 217
 218                fi = get_fuse_inode(inode);
 219                spin_lock(&fi->lock);
 220                fi->nlookup++;
 221                spin_unlock(&fi->lock);
 222
 223                forget_all_cached_acls(inode);
 224                fuse_change_attributes(inode, &o->attr,
 225                                       entry_attr_timeout(o),
 226                                       attr_version);
 227                /*
 228                 * The other branch comes via fuse_iget()
 229                 * which bumps nlookup inside
 230                 */
 231        } else {
 232                inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
 233                                  &o->attr, entry_attr_timeout(o),
 234                                  attr_version);
 235                if (!inode)
 236                        inode = ERR_PTR(-ENOMEM);
 237
 238                alias = d_splice_alias(inode, dentry);
 239                d_lookup_done(dentry);
 240                if (alias) {
 241                        dput(dentry);
 242                        dentry = alias;
 243                }
 244                if (IS_ERR(dentry))
 245                        return PTR_ERR(dentry);
 246        }
 247        if (fc->readdirplus_auto)
 248                set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
 249        fuse_change_entry_timeout(dentry, o);
 250
 251        dput(dentry);
 252        return 0;
 253}
 254
 255static void fuse_force_forget(struct file *file, u64 nodeid)
 256{
 257        struct inode *inode = file_inode(file);
 258        struct fuse_mount *fm = get_fuse_mount(inode);
 259        struct fuse_forget_in inarg;
 260        FUSE_ARGS(args);
 261
 262        memset(&inarg, 0, sizeof(inarg));
 263        inarg.nlookup = 1;
 264        args.opcode = FUSE_FORGET;
 265        args.nodeid = nodeid;
 266        args.in_numargs = 1;
 267        args.in_args[0].size = sizeof(inarg);
 268        args.in_args[0].value = &inarg;
 269        args.force = true;
 270        args.noreply = true;
 271
 272        fuse_simple_request(fm, &args);
 273        /* ignore errors */
 274}
 275
 276static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 277                             struct dir_context *ctx, u64 attr_version)
 278{
 279        struct fuse_direntplus *direntplus;
 280        struct fuse_dirent *dirent;
 281        size_t reclen;
 282        int over = 0;
 283        int ret;
 284
 285        while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
 286                direntplus = (struct fuse_direntplus *) buf;
 287                dirent = &direntplus->dirent;
 288                reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
 289
 290                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 291                        return -EIO;
 292                if (reclen > nbytes)
 293                        break;
 294                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 295                        return -EIO;
 296
 297                if (!over) {
 298                        /* We fill entries into dstbuf only as much as
 299                           it can hold. But we still continue iterating
 300                           over remaining entries to link them. If not,
 301                           we need to send a FORGET for each of those
 302                           which we did not link.
 303                        */
 304                        over = !fuse_emit(file, ctx, dirent);
 305                        if (!over)
 306                                ctx->pos = dirent->off;
 307                }
 308
 309                buf += reclen;
 310                nbytes -= reclen;
 311
 312                ret = fuse_direntplus_link(file, direntplus, attr_version);
 313                if (ret)
 314                        fuse_force_forget(file, direntplus->entry_out.nodeid);
 315        }
 316
 317        return 0;
 318}
 319
 320static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 321{
 322        int plus;
 323        ssize_t res;
 324        struct page *page;
 325        struct inode *inode = file_inode(file);
 326        struct fuse_mount *fm = get_fuse_mount(inode);
 327        struct fuse_io_args ia = {};
 328        struct fuse_args_pages *ap = &ia.ap;
 329        struct fuse_page_desc desc = { .length = PAGE_SIZE };
 330        u64 attr_version = 0;
 331        bool locked;
 332
 333        page = alloc_page(GFP_KERNEL);
 334        if (!page)
 335                return -ENOMEM;
 336
 337        plus = fuse_use_readdirplus(inode, ctx);
 338        ap->args.out_pages = true;
 339        ap->num_pages = 1;
 340        ap->pages = &page;
 341        ap->descs = &desc;
 342        if (plus) {
 343                attr_version = fuse_get_attr_version(fm->fc);
 344                fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
 345                                    FUSE_READDIRPLUS);
 346        } else {
 347                fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
 348                                    FUSE_READDIR);
 349        }
 350        locked = fuse_lock_inode(inode);
 351        res = fuse_simple_request(fm, &ap->args);
 352        fuse_unlock_inode(inode, locked);
 353        if (res >= 0) {
 354                if (!res) {
 355                        struct fuse_file *ff = file->private_data;
 356
 357                        if (ff->open_flags & FOPEN_CACHE_DIR)
 358                                fuse_readdir_cache_end(file, ctx->pos);
 359                } else if (plus) {
 360                        res = parse_dirplusfile(page_address(page), res,
 361                                                file, ctx, attr_version);
 362                } else {
 363                        res = parse_dirfile(page_address(page), res, file,
 364                                            ctx);
 365                }
 366        }
 367
 368        __free_page(page);
 369        fuse_invalidate_atime(inode);
 370        return res;
 371}
 372
 373enum fuse_parse_result {
 374        FOUND_ERR = -1,
 375        FOUND_NONE = 0,
 376        FOUND_SOME,
 377        FOUND_ALL,
 378};
 379
 380static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
 381                                               void *addr, unsigned int size,
 382                                               struct dir_context *ctx)
 383{
 384        unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
 385        enum fuse_parse_result res = FOUND_NONE;
 386
 387        WARN_ON(offset >= size);
 388
 389        for (;;) {
 390                struct fuse_dirent *dirent = addr + offset;
 391                unsigned int nbytes = size - offset;
 392                size_t reclen;
 393
 394                if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
 395                        break;
 396
 397                reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
 398
 399                if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
 400                        return FOUND_ERR;
 401                if (WARN_ON(reclen > nbytes))
 402                        return FOUND_ERR;
 403                if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
 404                        return FOUND_ERR;
 405
 406                if (ff->readdir.pos == ctx->pos) {
 407                        res = FOUND_SOME;
 408                        if (!dir_emit(ctx, dirent->name, dirent->namelen,
 409                                      dirent->ino, dirent->type))
 410                                return FOUND_ALL;
 411                        ctx->pos = dirent->off;
 412                }
 413                ff->readdir.pos = dirent->off;
 414                ff->readdir.cache_off += reclen;
 415
 416                offset += reclen;
 417        }
 418
 419        return res;
 420}
 421
 422static void fuse_rdc_reset(struct inode *inode)
 423{
 424        struct fuse_inode *fi = get_fuse_inode(inode);
 425
 426        fi->rdc.cached = false;
 427        fi->rdc.version++;
 428        fi->rdc.size = 0;
 429        fi->rdc.pos = 0;
 430}
 431
 432#define UNCACHED 1
 433
 434static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
 435{
 436        struct fuse_file *ff = file->private_data;
 437        struct inode *inode = file_inode(file);
 438        struct fuse_conn *fc = get_fuse_conn(inode);
 439        struct fuse_inode *fi = get_fuse_inode(inode);
 440        enum fuse_parse_result res;
 441        pgoff_t index;
 442        unsigned int size;
 443        struct page *page;
 444        void *addr;
 445
 446        /* Seeked?  If so, reset the cache stream */
 447        if (ff->readdir.pos != ctx->pos) {
 448                ff->readdir.pos = 0;
 449                ff->readdir.cache_off = 0;
 450        }
 451
 452        /*
 453         * We're just about to start reading into the cache or reading the
 454         * cache; both cases require an up-to-date mtime value.
 455         */
 456        if (!ctx->pos && fc->auto_inval_data) {
 457                int err = fuse_update_attributes(inode, file);
 458
 459                if (err)
 460                        return err;
 461        }
 462
 463retry:
 464        spin_lock(&fi->rdc.lock);
 465retry_locked:
 466        if (!fi->rdc.cached) {
 467                /* Starting cache? Set cache mtime. */
 468                if (!ctx->pos && !fi->rdc.size) {
 469                        fi->rdc.mtime = inode->i_mtime;
 470                        fi->rdc.iversion = inode_query_iversion(inode);
 471                }
 472                spin_unlock(&fi->rdc.lock);
 473                return UNCACHED;
 474        }
 475        /*
 476         * When at the beginning of the directory (i.e. just after opendir(3) or
 477         * rewinddir(3)), then need to check whether directory contents have
 478         * changed, and reset the cache if so.
 479         */
 480        if (!ctx->pos) {
 481                if (inode_peek_iversion(inode) != fi->rdc.iversion ||
 482                    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
 483                        fuse_rdc_reset(inode);
 484                        goto retry_locked;
 485                }
 486        }
 487
 488        /*
 489         * If cache version changed since the last getdents() call, then reset
 490         * the cache stream.
 491         */
 492        if (ff->readdir.version != fi->rdc.version) {
 493                ff->readdir.pos = 0;
 494                ff->readdir.cache_off = 0;
 495        }
 496        /*
 497         * If at the beginning of the cache, than reset version to
 498         * current.
 499         */
 500        if (ff->readdir.pos == 0)
 501                ff->readdir.version = fi->rdc.version;
 502
 503        WARN_ON(fi->rdc.size < ff->readdir.cache_off);
 504
 505        index = ff->readdir.cache_off >> PAGE_SHIFT;
 506
 507        if (index == (fi->rdc.size >> PAGE_SHIFT))
 508                size = fi->rdc.size & ~PAGE_MASK;
 509        else
 510                size = PAGE_SIZE;
 511        spin_unlock(&fi->rdc.lock);
 512
 513        /* EOF? */
 514        if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
 515                return 0;
 516
 517        page = find_get_page_flags(file->f_mapping, index,
 518                                   FGP_ACCESSED | FGP_LOCK);
 519        spin_lock(&fi->rdc.lock);
 520        if (!page) {
 521                /*
 522                 * Uh-oh: page gone missing, cache is useless
 523                 */
 524                if (fi->rdc.version == ff->readdir.version)
 525                        fuse_rdc_reset(inode);
 526                goto retry_locked;
 527        }
 528
 529        /* Make sure it's still the same version after getting the page. */
 530        if (ff->readdir.version != fi->rdc.version) {
 531                spin_unlock(&fi->rdc.lock);
 532                unlock_page(page);
 533                put_page(page);
 534                goto retry;
 535        }
 536        spin_unlock(&fi->rdc.lock);
 537
 538        /*
 539         * Contents of the page are now protected against changing by holding
 540         * the page lock.
 541         */
 542        addr = kmap(page);
 543        res = fuse_parse_cache(ff, addr, size, ctx);
 544        kunmap(page);
 545        unlock_page(page);
 546        put_page(page);
 547
 548        if (res == FOUND_ERR)
 549                return -EIO;
 550
 551        if (res == FOUND_ALL)
 552                return 0;
 553
 554        if (size == PAGE_SIZE) {
 555                /* We hit end of page: skip to next page. */
 556                ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
 557                goto retry;
 558        }
 559
 560        /*
 561         * End of cache reached.  If found position, then we are done, otherwise
 562         * need to fall back to uncached, since the position we were looking for
 563         * wasn't in the cache.
 564         */
 565        return res == FOUND_SOME ? 0 : UNCACHED;
 566}
 567
 568int fuse_readdir(struct file *file, struct dir_context *ctx)
 569{
 570        struct fuse_file *ff = file->private_data;
 571        struct inode *inode = file_inode(file);
 572        int err;
 573
 574        if (fuse_is_bad(inode))
 575                return -EIO;
 576
 577        mutex_lock(&ff->readdir.lock);
 578
 579        err = UNCACHED;
 580        if (ff->open_flags & FOPEN_CACHE_DIR)
 581                err = fuse_readdir_cached(file, ctx);
 582        if (err == UNCACHED)
 583                err = fuse_readdir_uncached(file, ctx);
 584
 585        mutex_unlock(&ff->readdir.lock);
 586
 587        return err;
 588}
 589