linux/fs/fuse/readdir.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9
  10#include "fuse_i.h"
  11#include <linux/iversion.h>
  12#include <linux/posix_acl.h>
  13#include <linux/pagemap.h>
  14#include <linux/highmem.h>
  15
  16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
  17{
  18        struct fuse_conn *fc = get_fuse_conn(dir);
  19        struct fuse_inode *fi = get_fuse_inode(dir);
  20
  21        if (!fc->do_readdirplus)
  22                return false;
  23        if (!fc->readdirplus_auto)
  24                return true;
  25        if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
  26                return true;
  27        if (ctx->pos == 0)
  28                return true;
  29        return false;
  30}
  31
  32static void fuse_add_dirent_to_cache(struct file *file,
  33                                     struct fuse_dirent *dirent, loff_t pos)
  34{
  35        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  36        size_t reclen = FUSE_DIRENT_SIZE(dirent);
  37        pgoff_t index;
  38        struct page *page;
  39        loff_t size;
  40        u64 version;
  41        unsigned int offset;
  42        void *addr;
  43
  44        spin_lock(&fi->rdc.lock);
  45        /*
  46         * Is cache already completed?  Or this entry does not go at the end of
  47         * cache?
  48         */
  49        if (fi->rdc.cached || pos != fi->rdc.pos) {
  50                spin_unlock(&fi->rdc.lock);
  51                return;
  52        }
  53        version = fi->rdc.version;
  54        size = fi->rdc.size;
  55        offset = size & ~PAGE_MASK;
  56        index = size >> PAGE_SHIFT;
  57        /* Dirent doesn't fit in current page?  Jump to next page. */
  58        if (offset + reclen > PAGE_SIZE) {
  59                index++;
  60                offset = 0;
  61        }
  62        spin_unlock(&fi->rdc.lock);
  63
  64        if (offset) {
  65                page = find_lock_page(file->f_mapping, index);
  66        } else {
  67                page = find_or_create_page(file->f_mapping, index,
  68                                           mapping_gfp_mask(file->f_mapping));
  69        }
  70        if (!page)
  71                return;
  72
  73        spin_lock(&fi->rdc.lock);
  74        /* Raced with another readdir */
  75        if (fi->rdc.version != version || fi->rdc.size != size ||
  76            WARN_ON(fi->rdc.pos != pos))
  77                goto unlock;
  78
  79        addr = kmap_atomic(page);
  80        if (!offset)
  81                clear_page(addr);
  82        memcpy(addr + offset, dirent, reclen);
  83        kunmap_atomic(addr);
  84        fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
  85        fi->rdc.pos = dirent->off;
  86unlock:
  87        spin_unlock(&fi->rdc.lock);
  88        unlock_page(page);
  89        put_page(page);
  90}
  91
  92static void fuse_readdir_cache_end(struct file *file, loff_t pos)
  93{
  94        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
  95        loff_t end;
  96
  97        spin_lock(&fi->rdc.lock);
  98        /* does cache end position match current position? */
  99        if (fi->rdc.pos != pos) {
 100                spin_unlock(&fi->rdc.lock);
 101                return;
 102        }
 103
 104        fi->rdc.cached = true;
 105        end = ALIGN(fi->rdc.size, PAGE_SIZE);
 106        spin_unlock(&fi->rdc.lock);
 107
 108        /* truncate unused tail of cache */
 109        truncate_inode_pages(file->f_mapping, end);
 110}
 111
 112static bool fuse_emit(struct file *file, struct dir_context *ctx,
 113                      struct fuse_dirent *dirent)
 114{
 115        struct fuse_file *ff = file->private_data;
 116
 117        if (ff->open_flags & FOPEN_CACHE_DIR)
 118                fuse_add_dirent_to_cache(file, dirent, ctx->pos);
 119
 120        return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
 121                        dirent->type);
 122}
 123
 124static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
 125                         struct dir_context *ctx)
 126{
 127        while (nbytes >= FUSE_NAME_OFFSET) {
 128                struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
 129                size_t reclen = FUSE_DIRENT_SIZE(dirent);
 130                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 131                        return -EIO;
 132                if (reclen > nbytes)
 133                        break;
 134                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 135                        return -EIO;
 136
 137                if (!fuse_emit(file, ctx, dirent))
 138                        break;
 139
 140                buf += reclen;
 141                nbytes -= reclen;
 142                ctx->pos = dirent->off;
 143        }
 144
 145        return 0;
 146}
 147
 148static int fuse_direntplus_link(struct file *file,
 149                                struct fuse_direntplus *direntplus,
 150                                u64 attr_version)
 151{
 152        struct fuse_entry_out *o = &direntplus->entry_out;
 153        struct fuse_dirent *dirent = &direntplus->dirent;
 154        struct dentry *parent = file->f_path.dentry;
 155        struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
 156        struct dentry *dentry;
 157        struct dentry *alias;
 158        struct inode *dir = d_inode(parent);
 159        struct fuse_conn *fc;
 160        struct inode *inode;
 161        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 162
 163        if (!o->nodeid) {
 164                /*
 165                 * Unlike in the case of fuse_lookup, zero nodeid does not mean
 166                 * ENOENT. Instead, it only means the userspace filesystem did
 167                 * not want to return attributes/handle for this entry.
 168                 *
 169                 * So do nothing.
 170                 */
 171                return 0;
 172        }
 173
 174        if (name.name[0] == '.') {
 175                /*
 176                 * We could potentially refresh the attributes of the directory
 177                 * and its parent?
 178                 */
 179                if (name.len == 1)
 180                        return 0;
 181                if (name.name[1] == '.' && name.len == 2)
 182                        return 0;
 183        }
 184
 185        if (invalid_nodeid(o->nodeid))
 186                return -EIO;
 187        if (!fuse_valid_type(o->attr.mode))
 188                return -EIO;
 189
 190        fc = get_fuse_conn(dir);
 191
 192        name.hash = full_name_hash(parent, name.name, name.len);
 193        dentry = d_lookup(parent, &name);
 194        if (!dentry) {
 195retry:
 196                dentry = d_alloc_parallel(parent, &name, &wq);
 197                if (IS_ERR(dentry))
 198                        return PTR_ERR(dentry);
 199        }
 200        if (!d_in_lookup(dentry)) {
 201                struct fuse_inode *fi;
 202                inode = d_inode(dentry);
 203                if (!inode ||
 204                    get_node_id(inode) != o->nodeid ||
 205                    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
 206                        d_invalidate(dentry);
 207                        dput(dentry);
 208                        goto retry;
 209                }
 210                if (is_bad_inode(inode)) {
 211                        dput(dentry);
 212                        return -EIO;
 213                }
 214
 215                fi = get_fuse_inode(inode);
 216                spin_lock(&fi->lock);
 217                fi->nlookup++;
 218                spin_unlock(&fi->lock);
 219
 220                forget_all_cached_acls(inode);
 221                fuse_change_attributes(inode, &o->attr,
 222                                       entry_attr_timeout(o),
 223                                       attr_version);
 224                /*
 225                 * The other branch comes via fuse_iget()
 226                 * which bumps nlookup inside
 227                 */
 228        } else {
 229                inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
 230                                  &o->attr, entry_attr_timeout(o),
 231                                  attr_version);
 232                if (!inode)
 233                        inode = ERR_PTR(-ENOMEM);
 234
 235                alias = d_splice_alias(inode, dentry);
 236                d_lookup_done(dentry);
 237                if (alias) {
 238                        dput(dentry);
 239                        dentry = alias;
 240                }
 241                if (IS_ERR(dentry))
 242                        return PTR_ERR(dentry);
 243        }
 244        if (fc->readdirplus_auto)
 245                set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
 246        fuse_change_entry_timeout(dentry, o);
 247
 248        dput(dentry);
 249        return 0;
 250}
 251
 252static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 253                             struct dir_context *ctx, u64 attr_version)
 254{
 255        struct fuse_direntplus *direntplus;
 256        struct fuse_dirent *dirent;
 257        size_t reclen;
 258        int over = 0;
 259        int ret;
 260
 261        while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
 262                direntplus = (struct fuse_direntplus *) buf;
 263                dirent = &direntplus->dirent;
 264                reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
 265
 266                if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
 267                        return -EIO;
 268                if (reclen > nbytes)
 269                        break;
 270                if (memchr(dirent->name, '/', dirent->namelen) != NULL)
 271                        return -EIO;
 272
 273                if (!over) {
 274                        /* We fill entries into dstbuf only as much as
 275                           it can hold. But we still continue iterating
 276                           over remaining entries to link them. If not,
 277                           we need to send a FORGET for each of those
 278                           which we did not link.
 279                        */
 280                        over = !fuse_emit(file, ctx, dirent);
 281                        if (!over)
 282                                ctx->pos = dirent->off;
 283                }
 284
 285                buf += reclen;
 286                nbytes -= reclen;
 287
 288                ret = fuse_direntplus_link(file, direntplus, attr_version);
 289                if (ret)
 290                        fuse_force_forget(file, direntplus->entry_out.nodeid);
 291        }
 292
 293        return 0;
 294}
 295
 296static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 297{
 298        int plus, err;
 299        size_t nbytes;
 300        struct page *page;
 301        struct inode *inode = file_inode(file);
 302        struct fuse_conn *fc = get_fuse_conn(inode);
 303        struct fuse_req *req;
 304        u64 attr_version = 0;
 305        bool locked;
 306
 307        req = fuse_get_req(fc, 1);
 308        if (IS_ERR(req))
 309                return PTR_ERR(req);
 310
 311        page = alloc_page(GFP_KERNEL);
 312        if (!page) {
 313                fuse_put_request(fc, req);
 314                return -ENOMEM;
 315        }
 316
 317        plus = fuse_use_readdirplus(inode, ctx);
 318        req->out.argpages = 1;
 319        req->num_pages = 1;
 320        req->pages[0] = page;
 321        req->page_descs[0].length = PAGE_SIZE;
 322        if (plus) {
 323                attr_version = fuse_get_attr_version(fc);
 324                fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
 325                               FUSE_READDIRPLUS);
 326        } else {
 327                fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
 328                               FUSE_READDIR);
 329        }
 330        locked = fuse_lock_inode(inode);
 331        fuse_request_send(fc, req);
 332        fuse_unlock_inode(inode, locked);
 333        nbytes = req->out.args[0].size;
 334        err = req->out.h.error;
 335        fuse_put_request(fc, req);
 336        if (!err) {
 337                if (!nbytes) {
 338                        struct fuse_file *ff = file->private_data;
 339
 340                        if (ff->open_flags & FOPEN_CACHE_DIR)
 341                                fuse_readdir_cache_end(file, ctx->pos);
 342                } else if (plus) {
 343                        err = parse_dirplusfile(page_address(page), nbytes,
 344                                                file, ctx, attr_version);
 345                } else {
 346                        err = parse_dirfile(page_address(page), nbytes, file,
 347                                            ctx);
 348                }
 349        }
 350
 351        __free_page(page);
 352        fuse_invalidate_atime(inode);
 353        return err;
 354}
 355
 356enum fuse_parse_result {
 357        FOUND_ERR = -1,
 358        FOUND_NONE = 0,
 359        FOUND_SOME,
 360        FOUND_ALL,
 361};
 362
 363static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
 364                                               void *addr, unsigned int size,
 365                                               struct dir_context *ctx)
 366{
 367        unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
 368        enum fuse_parse_result res = FOUND_NONE;
 369
 370        WARN_ON(offset >= size);
 371
 372        for (;;) {
 373                struct fuse_dirent *dirent = addr + offset;
 374                unsigned int nbytes = size - offset;
 375                size_t reclen = FUSE_DIRENT_SIZE(dirent);
 376
 377                if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
 378                        break;
 379
 380                if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
 381                        return FOUND_ERR;
 382                if (WARN_ON(reclen > nbytes))
 383                        return FOUND_ERR;
 384                if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
 385                        return FOUND_ERR;
 386
 387                if (ff->readdir.pos == ctx->pos) {
 388                        res = FOUND_SOME;
 389                        if (!dir_emit(ctx, dirent->name, dirent->namelen,
 390                                      dirent->ino, dirent->type))
 391                                return FOUND_ALL;
 392                        ctx->pos = dirent->off;
 393                }
 394                ff->readdir.pos = dirent->off;
 395                ff->readdir.cache_off += reclen;
 396
 397                offset += reclen;
 398        }
 399
 400        return res;
 401}
 402
 403static void fuse_rdc_reset(struct inode *inode)
 404{
 405        struct fuse_inode *fi = get_fuse_inode(inode);
 406
 407        fi->rdc.cached = false;
 408        fi->rdc.version++;
 409        fi->rdc.size = 0;
 410        fi->rdc.pos = 0;
 411}
 412
 413#define UNCACHED 1
 414
 415static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
 416{
 417        struct fuse_file *ff = file->private_data;
 418        struct inode *inode = file_inode(file);
 419        struct fuse_conn *fc = get_fuse_conn(inode);
 420        struct fuse_inode *fi = get_fuse_inode(inode);
 421        enum fuse_parse_result res;
 422        pgoff_t index;
 423        unsigned int size;
 424        struct page *page;
 425        void *addr;
 426
 427        /* Seeked?  If so, reset the cache stream */
 428        if (ff->readdir.pos != ctx->pos) {
 429                ff->readdir.pos = 0;
 430                ff->readdir.cache_off = 0;
 431        }
 432
 433        /*
 434         * We're just about to start reading into the cache or reading the
 435         * cache; both cases require an up-to-date mtime value.
 436         */
 437        if (!ctx->pos && fc->auto_inval_data) {
 438                int err = fuse_update_attributes(inode, file);
 439
 440                if (err)
 441                        return err;
 442        }
 443
 444retry:
 445        spin_lock(&fi->rdc.lock);
 446retry_locked:
 447        if (!fi->rdc.cached) {
 448                /* Starting cache? Set cache mtime. */
 449                if (!ctx->pos && !fi->rdc.size) {
 450                        fi->rdc.mtime = inode->i_mtime;
 451                        fi->rdc.iversion = inode_query_iversion(inode);
 452                }
 453                spin_unlock(&fi->rdc.lock);
 454                return UNCACHED;
 455        }
 456        /*
 457         * When at the beginning of the directory (i.e. just after opendir(3) or
 458         * rewinddir(3)), then need to check whether directory contents have
 459         * changed, and reset the cache if so.
 460         */
 461        if (!ctx->pos) {
 462                if (inode_peek_iversion(inode) != fi->rdc.iversion ||
 463                    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
 464                        fuse_rdc_reset(inode);
 465                        goto retry_locked;
 466                }
 467        }
 468
 469        /*
 470         * If cache version changed since the last getdents() call, then reset
 471         * the cache stream.
 472         */
 473        if (ff->readdir.version != fi->rdc.version) {
 474                ff->readdir.pos = 0;
 475                ff->readdir.cache_off = 0;
 476        }
 477        /*
 478         * If at the beginning of the cache, than reset version to
 479         * current.
 480         */
 481        if (ff->readdir.pos == 0)
 482                ff->readdir.version = fi->rdc.version;
 483
 484        WARN_ON(fi->rdc.size < ff->readdir.cache_off);
 485
 486        index = ff->readdir.cache_off >> PAGE_SHIFT;
 487
 488        if (index == (fi->rdc.size >> PAGE_SHIFT))
 489                size = fi->rdc.size & ~PAGE_MASK;
 490        else
 491                size = PAGE_SIZE;
 492        spin_unlock(&fi->rdc.lock);
 493
 494        /* EOF? */
 495        if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
 496                return 0;
 497
 498        page = find_get_page_flags(file->f_mapping, index,
 499                                   FGP_ACCESSED | FGP_LOCK);
 500        spin_lock(&fi->rdc.lock);
 501        if (!page) {
 502                /*
 503                 * Uh-oh: page gone missing, cache is useless
 504                 */
 505                if (fi->rdc.version == ff->readdir.version)
 506                        fuse_rdc_reset(inode);
 507                goto retry_locked;
 508        }
 509
 510        /* Make sure it's still the same version after getting the page. */
 511        if (ff->readdir.version != fi->rdc.version) {
 512                spin_unlock(&fi->rdc.lock);
 513                unlock_page(page);
 514                put_page(page);
 515                goto retry;
 516        }
 517        spin_unlock(&fi->rdc.lock);
 518
 519        /*
 520         * Contents of the page are now protected against changing by holding
 521         * the page lock.
 522         */
 523        addr = kmap(page);
 524        res = fuse_parse_cache(ff, addr, size, ctx);
 525        kunmap(page);
 526        unlock_page(page);
 527        put_page(page);
 528
 529        if (res == FOUND_ERR)
 530                return -EIO;
 531
 532        if (res == FOUND_ALL)
 533                return 0;
 534
 535        if (size == PAGE_SIZE) {
 536                /* We hit end of page: skip to next page. */
 537                ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
 538                goto retry;
 539        }
 540
 541        /*
 542         * End of cache reached.  If found position, then we are done, otherwise
 543         * need to fall back to uncached, since the position we were looking for
 544         * wasn't in the cache.
 545         */
 546        return res == FOUND_SOME ? 0 : UNCACHED;
 547}
 548
 549int fuse_readdir(struct file *file, struct dir_context *ctx)
 550{
 551        struct fuse_file *ff = file->private_data;
 552        struct inode *inode = file_inode(file);
 553        int err;
 554
 555        if (is_bad_inode(inode))
 556                return -EIO;
 557
 558        mutex_lock(&ff->readdir.lock);
 559
 560        err = UNCACHED;
 561        if (ff->open_flags & FOPEN_CACHE_DIR)
 562                err = fuse_readdir_cached(file, ctx);
 563        if (err == UNCACHED)
 564                err = fuse_readdir_uncached(file, ctx);
 565
 566        mutex_unlock(&ff->readdir.lock);
 567
 568        return err;
 569}
 570