linux/fs/fuse/readdir.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9
  10#include "fuse_i.h"
  11#include <linux/iversion.h>
  12#include <linux/posix_acl.h>
  13#include <linux/pagemap.h>
  14#include <linux/highmem.h>
  15
  16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
  17{
  18        struct fuse_conn *fc = get_fuse_conn(dir);
  19        struct fuse_inode *fi = get_fuse_inode(dir);
  20
  21        if (!fc->do_readdirplus)
  22                return false;
  23        if (!fc->readdirplus_auto)
  24                return true;
  25        if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
  26                return true;
  27        if (ctx->pos == 0)
  28                return true;
  29        return false;
  30}
  31
  32static void fuse_add_dirent_to_cache(struct file *file,
  33                                     struct fuse_dirent *dirent, loff_t pos)
  34{
  35        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  36        size_t reclen = FUSE_DIRENT_SIZE(dirent);
  37        pgoff_t index;
  38        struct page *page;
  39        loff_t size;
  40        u64 version;
  41        unsigned int offset;
  42        void *addr;
  43
  44        spin_lock(&fi->rdc.lock);
  45        /*
  46         * Is cache already completed?  Or this entry does not go at the end of
  47         * cache?
  48         */
  49        if (fi->rdc.cached || pos != fi->rdc.pos) {
  50                spin_unlock(&fi->rdc.lock);
  51                return;
  52        }
  53        version = fi->rdc.version;
  54        size = fi->rdc.size;
  55        offset = size & ~PAGE_MASK;
  56        index = size >> PAGE_SHIFT;
  57        /* Dirent doesn't fit in current page?  Jump to next page. */
  58        if (offset + reclen > PAGE_SIZE) {
  59                index++;
  60                offset = 0;
  61        }
  62        spin_unlock(&fi->rdc.lock);
  63
  64        if (offset) {
  65                page = find_lock_page(file->f_mapping, index);
  66        } else {
  67                page = find_or_create_page(file->f_mapping, index,
  68                                           mapping_gfp_mask(file->f_mapping));
  69        }
  70        if (!page)
  71                return;
  72
  73        spin_lock(&fi->rdc.lock);
  74        /* Raced with another readdir */
  75        if (fi->rdc.version != version || fi->rdc.size != size ||
  76            WARN_ON(fi->rdc.pos != pos))
  77                goto unlock;
  78
  79        addr = kmap_atomic(page);
  80        if (!offset)
  81                clear_page(addr);
  82        memcpy(addr + offset, dirent, reclen);
  83        kunmap_atomic(addr);
  84        fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
  85        fi->rdc.pos = dirent->off;
  86unlock:
  87        spin_unlock(&fi->rdc.lock);
  88        unlock_page(page);
  89        put_page(page);
  90}
  91
  92static void fuse_readdir_cache_end(struct file *file, loff_t pos)
  93{
  94        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  95        loff_t end;
  96
  97        spin_lock(&fi->rdc.lock);
  98        /* does cache end position match current position? */
  99        if (fi->rdc.pos != pos) {
 100                spin_unlock(&fi->rdc.lock);
 101                return;
 102        }
 103
 104        fi->rdc.cached = true;
 105        end = ALIGN(fi->rdc.size, PAGE_SIZE);
 106        spin_unlock(&fi->rdc.lock);
 107
 108        /* truncate unused tail of cache */
 109        truncate_inode_pages(file->f_mapping, end);
 110}
 111
 112static bool fuse_emit(struct file *file, struct dir_context *ctx,
 113                      struct fuse_dirent *dirent)
 114{
 115        struct fuse_file *ff = file->private_data;
 116
 117        if (ff->open_flags & FOPEN_CACHE_DIR)
 118                fuse_add_dirent_to_cache(file, dirent, ctx->pos);
 119
 120        return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
 121                        dirent->type);
 122}
 123
 124static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
 125                         struct dir_context *ctx)
 126{
 127        while (nbytes >= FUSE_NAME_OFFSET) {
 128                struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
 129                size_t reclen = FUSE_DIRENT_SIZE(dirent);
 130                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 131                        return -EIO;
 132                if (reclen > nbytes)
 133                        break;
 134                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 135                        return -EIO;
 136
 137                if (!fuse_emit(file, ctx, dirent))
 138                        break;
 139
 140                buf += reclen;
 141                nbytes -= reclen;
 142                ctx->pos = dirent->off;
 143        }
 144
 145        return 0;
 146}
 147
 148static int fuse_direntplus_link(struct file *file,
 149                                struct fuse_direntplus *direntplus,
 150                                u64 attr_version)
 151{
 152        struct fuse_entry_out *o = &direntplus->entry_out;
 153        struct fuse_dirent *dirent = &direntplus->dirent;
 154        struct dentry *parent = file->f_path.dentry;
 155        struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
 156        struct dentry *dentry;
 157        struct dentry *alias;
 158        struct inode *dir = d_inode(parent);
 159        struct fuse_conn *fc;
 160        struct inode *inode;
 161        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 162
 163        if (!o->nodeid) {
 164                /*
 165                 * Unlike in the case of fuse_lookup, zero nodeid does not mean
 166                 * ENOENT. Instead, it only means the userspace filesystem did
 167                 * not want to return attributes/handle for this entry.
 168                 *
 169                 * So do nothing.
 170                 */
 171                return 0;
 172        }
 173
 174        if (name.name[0] == '.') {
 175                /*
 176                 * We could potentially refresh the attributes of the directory
 177                 * and its parent?
 178                 */
 179                if (name.len == 1)
 180                        return 0;
 181                if (name.name[1] == '.' && name.len == 2)
 182                        return 0;
 183        }
 184
 185        if (invalid_nodeid(o->nodeid))
 186                return -EIO;
 187        if (fuse_invalid_attr(&o->attr))
 188                return -EIO;
 189
 190        fc = get_fuse_conn(dir);
 191
 192        name.hash = full_name_hash(parent, name.name, name.len);
 193        dentry = d_lookup(parent, &name);
 194        if (!dentry) {
 195retry:
 196                dentry = d_alloc_parallel(parent, &name, &wq);
 197                if (IS_ERR(dentry))
 198                        return PTR_ERR(dentry);
 199        }
 200        if (!d_in_lookup(dentry)) {
 201                struct fuse_inode *fi;
 202                inode = d_inode(dentry);
 203                if (!inode ||
 204                    get_node_id(inode) != o->nodeid ||
 205                    inode_wrong_type(inode, o->attr.mode)) {
 206                        d_invalidate(dentry);
 207                        dput(dentry);
 208                        goto retry;
 209                }
 210                if (fuse_is_bad(inode)) {
 211                        dput(dentry);
 212                        return -EIO;
 213                }
 214
 215                fi = get_fuse_inode(inode);
 216                spin_lock(&fi->lock);
 217                fi->nlookup++;
 218                spin_unlock(&fi->lock);
 219
 220                forget_all_cached_acls(inode);
 221                fuse_change_attributes(inode, &o->attr,
 222                                       entry_attr_timeout(o),
 223                                       attr_version);
 224                /*
 225                 * The other branch comes via fuse_iget()
 226                 * which bumps nlookup inside
 227                 */
 228        } else {
 229                inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
 230                                  &o->attr, entry_attr_timeout(o),
 231                                  attr_version);
 232                if (!inode)
 233                        inode = ERR_PTR(-ENOMEM);
 234
 235                alias = d_splice_alias(inode, dentry);
 236                d_lookup_done(dentry);
 237                if (alias) {
 238                        dput(dentry);
 239                        dentry = alias;
 240                }
 241                if (IS_ERR(dentry))
 242                        return PTR_ERR(dentry);
 243        }
 244        if (fc->readdirplus_auto)
 245                set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
 246        fuse_change_entry_timeout(dentry, o);
 247
 248        dput(dentry);
 249        return 0;
 250}
 251
 252static void fuse_force_forget(struct file *file, u64 nodeid)
 253{
 254        struct inode *inode = file_inode(file);
 255        struct fuse_mount *fm = get_fuse_mount(inode);
 256        struct fuse_forget_in inarg;
 257        FUSE_ARGS(args);
 258
 259        memset(&inarg, 0, sizeof(inarg));
 260        inarg.nlookup = 1;
 261        args.opcode = FUSE_FORGET;
 262        args.nodeid = nodeid;
 263        args.in_numargs = 1;
 264        args.in_args[0].size = sizeof(inarg);
 265        args.in_args[0].value = &inarg;
 266        args.force = true;
 267        args.noreply = true;
 268
 269        fuse_simple_request(fm, &args);
 270        /* ignore errors */
 271}
 272
 273static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 274                             struct dir_context *ctx, u64 attr_version)
 275{
 276        struct fuse_direntplus *direntplus;
 277        struct fuse_dirent *dirent;
 278        size_t reclen;
 279        int over = 0;
 280        int ret;
 281
 282        while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
 283                direntplus = (struct fuse_direntplus *) buf;
 284                dirent = &direntplus->dirent;
 285                reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
 286
 287                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 288                        return -EIO;
 289                if (reclen > nbytes)
 290                        break;
 291                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 292                        return -EIO;
 293
 294                if (!over) {
 295                        /* We fill entries into dstbuf only as much as
 296                           it can hold. But we still continue iterating
 297                           over remaining entries to link them. If not,
 298                           we need to send a FORGET for each of those
 299                           which we did not link.
 300                        */
 301                        over = !fuse_emit(file, ctx, dirent);
 302                        if (!over)
 303                                ctx->pos = dirent->off;
 304                }
 305
 306                buf += reclen;
 307                nbytes -= reclen;
 308
 309                ret = fuse_direntplus_link(file, direntplus, attr_version);
 310                if (ret)
 311                        fuse_force_forget(file, direntplus->entry_out.nodeid);
 312        }
 313
 314        return 0;
 315}
 316
 317static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 318{
 319        int plus;
 320        ssize_t res;
 321        struct page *page;
 322        struct inode *inode = file_inode(file);
 323        struct fuse_mount *fm = get_fuse_mount(inode);
 324        struct fuse_io_args ia = {};
 325        struct fuse_args_pages *ap = &ia.ap;
 326        struct fuse_page_desc desc = { .length = PAGE_SIZE };
 327        u64 attr_version = 0;
 328        bool locked;
 329
 330        page = alloc_page(GFP_KERNEL);
 331        if (!page)
 332                return -ENOMEM;
 333
 334        plus = fuse_use_readdirplus(inode, ctx);
 335        ap->args.out_pages = true;
 336        ap->num_pages = 1;
 337        ap->pages = &page;
 338        ap->descs = &desc;
 339        if (plus) {
 340                attr_version = fuse_get_attr_version(fm->fc);
 341                fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
 342                                    FUSE_READDIRPLUS);
 343        } else {
 344                fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
 345                                    FUSE_READDIR);
 346        }
 347        locked = fuse_lock_inode(inode);
 348        res = fuse_simple_request(fm, &ap->args);
 349        fuse_unlock_inode(inode, locked);
 350        if (res >= 0) {
 351                if (!res) {
 352                        struct fuse_file *ff = file->private_data;
 353
 354                        if (ff->open_flags & FOPEN_CACHE_DIR)
 355                                fuse_readdir_cache_end(file, ctx->pos);
 356                } else if (plus) {
 357                        res = parse_dirplusfile(page_address(page), res,
 358                                                file, ctx, attr_version);
 359                } else {
 360                        res = parse_dirfile(page_address(page), res, file,
 361                                            ctx);
 362                }
 363        }
 364
 365        __free_page(page);
 366        fuse_invalidate_atime(inode);
 367        return res;
 368}
 369
 370enum fuse_parse_result {
 371        FOUND_ERR = -1,
 372        FOUND_NONE = 0,
 373        FOUND_SOME,
 374        FOUND_ALL,
 375};
 376
 377static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
 378                                               void *addr, unsigned int size,
 379                                               struct dir_context *ctx)
 380{
 381        unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
 382        enum fuse_parse_result res = FOUND_NONE;
 383
 384        WARN_ON(offset >= size);
 385
 386        for (;;) {
 387                struct fuse_dirent *dirent = addr + offset;
 388                unsigned int nbytes = size - offset;
 389                size_t reclen;
 390
 391                if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
 392                        break;
 393
 394                reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
 395
 396                if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
 397                        return FOUND_ERR;
 398                if (WARN_ON(reclen > nbytes))
 399                        return FOUND_ERR;
 400                if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
 401                        return FOUND_ERR;
 402
 403                if (ff->readdir.pos == ctx->pos) {
 404                        res = FOUND_SOME;
 405                        if (!dir_emit(ctx, dirent->name, dirent->namelen,
 406                                      dirent->ino, dirent->type))
 407                                return FOUND_ALL;
 408                        ctx->pos = dirent->off;
 409                }
 410                ff->readdir.pos = dirent->off;
 411                ff->readdir.cache_off += reclen;
 412
 413                offset += reclen;
 414        }
 415
 416        return res;
 417}
 418
 419static void fuse_rdc_reset(struct inode *inode)
 420{
 421        struct fuse_inode *fi = get_fuse_inode(inode);
 422
 423        fi->rdc.cached = false;
 424        fi->rdc.version++;
 425        fi->rdc.size = 0;
 426        fi->rdc.pos = 0;
 427}
 428
 429#define UNCACHED 1
 430
 431static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
 432{
 433        struct fuse_file *ff = file->private_data;
 434        struct inode *inode = file_inode(file);
 435        struct fuse_conn *fc = get_fuse_conn(inode);
 436        struct fuse_inode *fi = get_fuse_inode(inode);
 437        enum fuse_parse_result res;
 438        pgoff_t index;
 439        unsigned int size;
 440        struct page *page;
 441        void *addr;
 442
 443        /* Seeked?  If so, reset the cache stream */
 444        if (ff->readdir.pos != ctx->pos) {
 445                ff->readdir.pos = 0;
 446                ff->readdir.cache_off = 0;
 447        }
 448
 449        /*
 450         * We're just about to start reading into the cache or reading the
 451         * cache; both cases require an up-to-date mtime value.
 452         */
 453        if (!ctx->pos && fc->auto_inval_data) {
 454                int err = fuse_update_attributes(inode, file);
 455
 456                if (err)
 457                        return err;
 458        }
 459
 460retry:
 461        spin_lock(&fi->rdc.lock);
 462retry_locked:
 463        if (!fi->rdc.cached) {
 464                /* Starting cache? Set cache mtime. */
 465                if (!ctx->pos && !fi->rdc.size) {
 466                        fi->rdc.mtime = inode->i_mtime;
 467                        fi->rdc.iversion = inode_query_iversion(inode);
 468                }
 469                spin_unlock(&fi->rdc.lock);
 470                return UNCACHED;
 471        }
 472        /*
 473         * When at the beginning of the directory (i.e. just after opendir(3) or
 474         * rewinddir(3)), then need to check whether directory contents have
 475         * changed, and reset the cache if so.
 476         */
 477        if (!ctx->pos) {
 478                if (inode_peek_iversion(inode) != fi->rdc.iversion ||
 479                    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
 480                        fuse_rdc_reset(inode);
 481                        goto retry_locked;
 482                }
 483        }
 484
 485        /*
 486         * If cache version changed since the last getdents() call, then reset
 487         * the cache stream.
 488         */
 489        if (ff->readdir.version != fi->rdc.version) {
 490                ff->readdir.pos = 0;
 491                ff->readdir.cache_off = 0;
 492        }
 493        /*
 494         * If at the beginning of the cache, than reset version to
 495         * current.
 496         */
 497        if (ff->readdir.pos == 0)
 498                ff->readdir.version = fi->rdc.version;
 499
 500        WARN_ON(fi->rdc.size < ff->readdir.cache_off);
 501
 502        index = ff->readdir.cache_off >> PAGE_SHIFT;
 503
 504        if (index == (fi->rdc.size >> PAGE_SHIFT))
 505                size = fi->rdc.size & ~PAGE_MASK;
 506        else
 507                size = PAGE_SIZE;
 508        spin_unlock(&fi->rdc.lock);
 509
 510        /* EOF? */
 511        if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
 512                return 0;
 513
 514        page = find_get_page_flags(file->f_mapping, index,
 515                                   FGP_ACCESSED | FGP_LOCK);
 516        spin_lock(&fi->rdc.lock);
 517        if (!page) {
 518                /*
 519                 * Uh-oh: page gone missing, cache is useless
 520                 */
 521                if (fi->rdc.version == ff->readdir.version)
 522                        fuse_rdc_reset(inode);
 523                goto retry_locked;
 524        }
 525
 526        /* Make sure it's still the same version after getting the page. */
 527        if (ff->readdir.version != fi->rdc.version) {
 528                spin_unlock(&fi->rdc.lock);
 529                unlock_page(page);
 530                put_page(page);
 531                goto retry;
 532        }
 533        spin_unlock(&fi->rdc.lock);
 534
 535        /*
 536         * Contents of the page are now protected against changing by holding
 537         * the page lock.
 538         */
 539        addr = kmap(page);
 540        res = fuse_parse_cache(ff, addr, size, ctx);
 541        kunmap(page);
 542        unlock_page(page);
 543        put_page(page);
 544
 545        if (res == FOUND_ERR)
 546                return -EIO;
 547
 548        if (res == FOUND_ALL)
 549                return 0;
 550
 551        if (size == PAGE_SIZE) {
 552                /* We hit end of page: skip to next page. */
 553                ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
 554                goto retry;
 555        }
 556
 557        /*
 558         * End of cache reached.  If found position, then we are done, otherwise
 559         * need to fall back to uncached, since the position we were looking for
 560         * wasn't in the cache.
 561         */
 562        return res == FOUND_SOME ? 0 : UNCACHED;
 563}
 564
 565int fuse_readdir(struct file *file, struct dir_context *ctx)
 566{
 567        struct fuse_file *ff = file->private_data;
 568        struct inode *inode = file_inode(file);
 569        int err;
 570
 571        if (fuse_is_bad(inode))
 572                return -EIO;
 573
 574        mutex_lock(&ff->readdir.lock);
 575
 576        err = UNCACHED;
 577        if (ff->open_flags & FOPEN_CACHE_DIR)
 578                err = fuse_readdir_cached(file, ctx);
 579        if (err == UNCACHED)
 580                err = fuse_readdir_uncached(file, ctx);
 581
 582        mutex_unlock(&ff->readdir.lock);
 583
 584        return err;
 585}
 586