linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/sched/signal.h>
  15#include <linux/uio.h>
  16#include <linux/miscdevice.h>
  17#include <linux/pagemap.h>
  18#include <linux/file.h>
  19#include <linux/slab.h>
  20#include <linux/pipe_fs_i.h>
  21#include <linux/swap.h>
  22#include <linux/splice.h>
  23#include <linux/sched.h>
  24
  25MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  26MODULE_ALIAS("devname:fuse");
  27
  28/* Ordinary requests have even IDs, while interrupts IDs are odd */
  29#define FUSE_INT_REQ_BIT (1ULL << 0)
  30#define FUSE_REQ_ID_STEP (1ULL << 1)
  31
  32static struct kmem_cache *fuse_req_cachep;
  33
  34static struct fuse_dev *fuse_get_dev(struct file *file)
  35{
  36        /*
  37         * Lockless access is OK, because file->private data is set
  38         * once during mount and is valid until the file is released.
  39         */
  40        return READ_ONCE(file->private_data);
  41}
  42
  43static void fuse_request_init(struct fuse_req *req, struct page **pages,
  44                              struct fuse_page_desc *page_descs,
  45                              unsigned npages)
  46{
  47        INIT_LIST_HEAD(&req->list);
  48        INIT_LIST_HEAD(&req->intr_entry);
  49        init_waitqueue_head(&req->waitq);
  50        refcount_set(&req->count, 1);
  51        req->pages = pages;
  52        req->page_descs = page_descs;
  53        req->max_pages = npages;
  54        __set_bit(FR_PENDING, &req->flags);
  55}
  56
  57static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
  58                                          struct fuse_page_desc **desc)
  59{
  60        struct page **pages;
  61
  62        pages = kzalloc(npages * (sizeof(struct page *) +
  63                                  sizeof(struct fuse_page_desc)), flags);
  64        *desc = (void *) pages + npages * sizeof(struct page *);
  65
  66        return pages;
  67}
  68
  69static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
  70{
  71        struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
  72        if (req) {
  73                struct page **pages = NULL;
  74                struct fuse_page_desc *page_descs = NULL;
  75
  76                WARN_ON(npages > FUSE_MAX_MAX_PAGES);
  77                if (npages > FUSE_REQ_INLINE_PAGES) {
  78                        pages = fuse_req_pages_alloc(npages, flags,
  79                                                     &page_descs);
  80                        if (!pages) {
  81                                kmem_cache_free(fuse_req_cachep, req);
  82                                return NULL;
  83                        }
  84                } else if (npages) {
  85                        pages = req->inline_pages;
  86                        page_descs = req->inline_page_descs;
  87                }
  88
  89                fuse_request_init(req, pages, page_descs, npages);
  90        }
  91        return req;
  92}
  93
  94struct fuse_req *fuse_request_alloc(unsigned npages)
  95{
  96        return __fuse_request_alloc(npages, GFP_KERNEL);
  97}
  98EXPORT_SYMBOL_GPL(fuse_request_alloc);
  99
 100struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
 101{
 102        return __fuse_request_alloc(npages, GFP_NOFS);
 103}
 104
 105static void fuse_req_pages_free(struct fuse_req *req)
 106{
 107        if (req->pages != req->inline_pages)
 108                kfree(req->pages);
 109}
 110
 111bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
 112                            gfp_t flags)
 113{
 114        struct page **pages;
 115        struct fuse_page_desc *page_descs;
 116        unsigned int npages = min_t(unsigned int,
 117                                    max_t(unsigned int, req->max_pages * 2,
 118                                          FUSE_DEFAULT_MAX_PAGES_PER_REQ),
 119                                    fc->max_pages);
 120        WARN_ON(npages <= req->max_pages);
 121
 122        pages = fuse_req_pages_alloc(npages, flags, &page_descs);
 123        if (!pages)
 124                return false;
 125
 126        memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
 127        memcpy(page_descs, req->page_descs,
 128               sizeof(struct fuse_page_desc) * req->max_pages);
 129        fuse_req_pages_free(req);
 130        req->pages = pages;
 131        req->page_descs = page_descs;
 132        req->max_pages = npages;
 133
 134        return true;
 135}
 136
 137void fuse_request_free(struct fuse_req *req)
 138{
 139        fuse_req_pages_free(req);
 140        kmem_cache_free(fuse_req_cachep, req);
 141}
 142
 143void __fuse_get_request(struct fuse_req *req)
 144{
 145        refcount_inc(&req->count);
 146}
 147
 148/* Must be called with > 1 refcount */
 149static void __fuse_put_request(struct fuse_req *req)
 150{
 151        refcount_dec(&req->count);
 152}
 153
 154void fuse_set_initialized(struct fuse_conn *fc)
 155{
 156        /* Make sure stores before this are seen on another CPU */
 157        smp_wmb();
 158        fc->initialized = 1;
 159}
 160
 161static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 162{
 163        return !fc->initialized || (for_background && fc->blocked);
 164}
 165
 166static void fuse_drop_waiting(struct fuse_conn *fc)
 167{
 168        /*
 169         * lockess check of fc->connected is okay, because atomic_dec_and_test()
 170         * provides a memory barrier mached with the one in fuse_wait_aborted()
 171         * to ensure no wake-up is missed.
 172         */
 173        if (atomic_dec_and_test(&fc->num_waiting) &&
 174            !READ_ONCE(fc->connected)) {
 175                /* wake up aborters */
 176                wake_up_all(&fc->blocked_waitq);
 177        }
 178}
 179
 180static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 181                                       bool for_background)
 182{
 183        struct fuse_req *req;
 184        int err;
 185        atomic_inc(&fc->num_waiting);
 186
 187        if (fuse_block_alloc(fc, for_background)) {
 188                err = -EINTR;
 189                if (wait_event_killable_exclusive(fc->blocked_waitq,
 190                                !fuse_block_alloc(fc, for_background)))
 191                        goto out;
 192        }
 193        /* Matches smp_wmb() in fuse_set_initialized() */
 194        smp_rmb();
 195
 196        err = -ENOTCONN;
 197        if (!fc->connected)
 198                goto out;
 199
 200        err = -ECONNREFUSED;
 201        if (fc->conn_error)
 202                goto out;
 203
 204        req = fuse_request_alloc(npages);
 205        err = -ENOMEM;
 206        if (!req) {
 207                if (for_background)
 208                        wake_up(&fc->blocked_waitq);
 209                goto out;
 210        }
 211
 212        req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
 213        req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
 214        req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 215
 216        __set_bit(FR_WAITING, &req->flags);
 217        if (for_background)
 218                __set_bit(FR_BACKGROUND, &req->flags);
 219
 220        if (unlikely(req->in.h.uid == ((uid_t)-1) ||
 221                     req->in.h.gid == ((gid_t)-1))) {
 222                fuse_put_request(fc, req);
 223                return ERR_PTR(-EOVERFLOW);
 224        }
 225        return req;
 226
 227 out:
 228        fuse_drop_waiting(fc);
 229        return ERR_PTR(err);
 230}
 231
 232struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
 233{
 234        return __fuse_get_req(fc, npages, false);
 235}
 236EXPORT_SYMBOL_GPL(fuse_get_req);
 237
 238struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
 239                                             unsigned npages)
 240{
 241        return __fuse_get_req(fc, npages, true);
 242}
 243EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
 244
 245/*
 246 * Return request in fuse_file->reserved_req.  However that may
 247 * currently be in use.  If that is the case, wait for it to become
 248 * available.
 249 */
 250static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 251                                         struct file *file)
 252{
 253        struct fuse_req *req = NULL;
 254        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
 255        struct fuse_file *ff = file->private_data;
 256
 257        do {
 258                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 259                spin_lock(&fi->lock);
 260                if (ff->reserved_req) {
 261                        req = ff->reserved_req;
 262                        ff->reserved_req = NULL;
 263                        req->stolen_file = get_file(file);
 264                }
 265                spin_unlock(&fi->lock);
 266        } while (!req);
 267
 268        return req;
 269}
 270
 271/*
 272 * Put stolen request back into fuse_file->reserved_req
 273 */
 274static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 275{
 276        struct file *file = req->stolen_file;
 277        struct fuse_inode *fi = get_fuse_inode(file_inode(file));
 278        struct fuse_file *ff = file->private_data;
 279
 280        WARN_ON(req->max_pages);
 281        spin_lock(&fi->lock);
 282        memset(req, 0, sizeof(*req));
 283        fuse_request_init(req, NULL, NULL, 0);
 284        BUG_ON(ff->reserved_req);
 285        ff->reserved_req = req;
 286        wake_up_all(&fc->reserved_req_waitq);
 287        spin_unlock(&fi->lock);
 288        fput(file);
 289}
 290
 291/*
 292 * Gets a requests for a file operation, always succeeds
 293 *
 294 * This is used for sending the FLUSH request, which must get to
 295 * userspace, due to POSIX locks which may need to be unlocked.
 296 *
 297 * If allocation fails due to OOM, use the reserved request in
 298 * fuse_file.
 299 *
 300 * This is very unlikely to deadlock accidentally, since the
 301 * filesystem should not have it's own file open.  If deadlock is
 302 * intentional, it can still be broken by "aborting" the filesystem.
 303 */
 304struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 305                                             struct file *file)
 306{
 307        struct fuse_req *req;
 308
 309        atomic_inc(&fc->num_waiting);
 310        wait_event(fc->blocked_waitq, fc->initialized);
 311        /* Matches smp_wmb() in fuse_set_initialized() */
 312        smp_rmb();
 313        req = fuse_request_alloc(0);
 314        if (!req)
 315                req = get_reserved_req(fc, file);
 316
 317        req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
 318        req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
 319        req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 320
 321        __set_bit(FR_WAITING, &req->flags);
 322        __clear_bit(FR_BACKGROUND, &req->flags);
 323        return req;
 324}
 325
 326void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 327{
 328        if (refcount_dec_and_test(&req->count)) {
 329                if (test_bit(FR_BACKGROUND, &req->flags)) {
 330                        /*
 331                         * We get here in the unlikely case that a background
 332                         * request was allocated but not sent
 333                         */
 334                        spin_lock(&fc->bg_lock);
 335                        if (!fc->blocked)
 336                                wake_up(&fc->blocked_waitq);
 337                        spin_unlock(&fc->bg_lock);
 338                }
 339
 340                if (test_bit(FR_WAITING, &req->flags)) {
 341                        __clear_bit(FR_WAITING, &req->flags);
 342                        fuse_drop_waiting(fc);
 343                }
 344
 345                if (req->stolen_file)
 346                        put_reserved_req(fc, req);
 347                else
 348                        fuse_request_free(req);
 349        }
 350}
 351EXPORT_SYMBOL_GPL(fuse_put_request);
 352
 353static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 354{
 355        unsigned nbytes = 0;
 356        unsigned i;
 357
 358        for (i = 0; i < numargs; i++)
 359                nbytes += args[i].size;
 360
 361        return nbytes;
 362}
 363
 364static u64 fuse_get_unique(struct fuse_iqueue *fiq)
 365{
 366        fiq->reqctr += FUSE_REQ_ID_STEP;
 367        return fiq->reqctr;
 368}
 369
 370static unsigned int fuse_req_hash(u64 unique)
 371{
 372        return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
 373}
 374
 375static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
 376{
 377        req->in.h.len = sizeof(struct fuse_in_header) +
 378                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 379        list_add_tail(&req->list, &fiq->pending);
 380        wake_up_locked(&fiq->waitq);
 381        kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 382}
 383
 384void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 385                       u64 nodeid, u64 nlookup)
 386{
 387        struct fuse_iqueue *fiq = &fc->iq;
 388
 389        forget->forget_one.nodeid = nodeid;
 390        forget->forget_one.nlookup = nlookup;
 391
 392        spin_lock(&fiq->waitq.lock);
 393        if (fiq->connected) {
 394                fiq->forget_list_tail->next = forget;
 395                fiq->forget_list_tail = forget;
 396                wake_up_locked(&fiq->waitq);
 397                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 398        } else {
 399                kfree(forget);
 400        }
 401        spin_unlock(&fiq->waitq.lock);
 402}
 403
 404static void flush_bg_queue(struct fuse_conn *fc)
 405{
 406        struct fuse_iqueue *fiq = &fc->iq;
 407
 408        while (fc->active_background < fc->max_background &&
 409               !list_empty(&fc->bg_queue)) {
 410                struct fuse_req *req;
 411
 412                req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
 413                list_del(&req->list);
 414                fc->active_background++;
 415                spin_lock(&fiq->waitq.lock);
 416                req->in.h.unique = fuse_get_unique(fiq);
 417                queue_request(fiq, req);
 418                spin_unlock(&fiq->waitq.lock);
 419        }
 420}
 421
 422/*
 423 * This function is called when a request is finished.  Either a reply
 424 * has arrived or it was aborted (and not yet sent) or some error
 425 * occurred during communication with userspace, or the device file
 426 * was closed.  The requester thread is woken up (if still waiting),
 427 * the 'end' callback is called if given, else the reference to the
 428 * request is released
 429 */
 430static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 431{
 432        struct fuse_iqueue *fiq = &fc->iq;
 433
 434        if (test_and_set_bit(FR_FINISHED, &req->flags))
 435                goto put_request;
 436        /*
 437         * test_and_set_bit() implies smp_mb() between bit
 438         * changing and below intr_entry check. Pairs with
 439         * smp_mb() from queue_interrupt().
 440         */
 441        if (!list_empty(&req->intr_entry)) {
 442                spin_lock(&fiq->waitq.lock);
 443                list_del_init(&req->intr_entry);
 444                spin_unlock(&fiq->waitq.lock);
 445        }
 446        WARN_ON(test_bit(FR_PENDING, &req->flags));
 447        WARN_ON(test_bit(FR_SENT, &req->flags));
 448        if (test_bit(FR_BACKGROUND, &req->flags)) {
 449                spin_lock(&fc->bg_lock);
 450                clear_bit(FR_BACKGROUND, &req->flags);
 451                if (fc->num_background == fc->max_background) {
 452                        fc->blocked = 0;
 453                        wake_up(&fc->blocked_waitq);
 454                } else if (!fc->blocked) {
 455                        /*
 456                         * Wake up next waiter, if any.  It's okay to use
 457                         * waitqueue_active(), as we've already synced up
 458                         * fc->blocked with waiters with the wake_up() call
 459                         * above.
 460                         */
 461                        if (waitqueue_active(&fc->blocked_waitq))
 462                                wake_up(&fc->blocked_waitq);
 463                }
 464
 465                if (fc->num_background == fc->congestion_threshold && fc->sb) {
 466                        clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
 467                        clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
 468                }
 469                fc->num_background--;
 470                fc->active_background--;
 471                flush_bg_queue(fc);
 472                spin_unlock(&fc->bg_lock);
 473        } else {
 474                /* Wake up waiter sleeping in request_wait_answer() */
 475                wake_up(&req->waitq);
 476        }
 477
 478        if (req->end)
 479                req->end(fc, req);
 480put_request:
 481        fuse_put_request(fc, req);
 482}
 483
 484static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 485{
 486        spin_lock(&fiq->waitq.lock);
 487        /* Check for we've sent request to interrupt this req */
 488        if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
 489                spin_unlock(&fiq->waitq.lock);
 490                return -EINVAL;
 491        }
 492
 493        if (list_empty(&req->intr_entry)) {
 494                list_add_tail(&req->intr_entry, &fiq->interrupts);
 495                /*
 496                 * Pairs with smp_mb() implied by test_and_set_bit()
 497                 * from request_end().
 498                 */
 499                smp_mb();
 500                if (test_bit(FR_FINISHED, &req->flags)) {
 501                        list_del_init(&req->intr_entry);
 502                        spin_unlock(&fiq->waitq.lock);
 503                        return 0;
 504                }
 505                wake_up_locked(&fiq->waitq);
 506                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 507        }
 508        spin_unlock(&fiq->waitq.lock);
 509        return 0;
 510}
 511
 512static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 513{
 514        struct fuse_iqueue *fiq = &fc->iq;
 515        int err;
 516
 517        if (!fc->no_interrupt) {
 518                /* Any signal may interrupt this */
 519                err = wait_event_interruptible(req->waitq,
 520                                        test_bit(FR_FINISHED, &req->flags));
 521                if (!err)
 522                        return;
 523
 524                set_bit(FR_INTERRUPTED, &req->flags);
 525                /* matches barrier in fuse_dev_do_read() */
 526                smp_mb__after_atomic();
 527                if (test_bit(FR_SENT, &req->flags))
 528                        queue_interrupt(fiq, req);
 529        }
 530
 531        if (!test_bit(FR_FORCE, &req->flags)) {
 532                /* Only fatal signals may interrupt this */
 533                err = wait_event_killable(req->waitq,
 534                                        test_bit(FR_FINISHED, &req->flags));
 535                if (!err)
 536                        return;
 537
 538                spin_lock(&fiq->waitq.lock);
 539                /* Request is not yet in userspace, bail out */
 540                if (test_bit(FR_PENDING, &req->flags)) {
 541                        list_del(&req->list);
 542                        spin_unlock(&fiq->waitq.lock);
 543                        __fuse_put_request(req);
 544                        req->out.h.error = -EINTR;
 545                        return;
 546                }
 547                spin_unlock(&fiq->waitq.lock);
 548        }
 549
 550        /*
 551         * Either request is already in userspace, or it was forced.
 552         * Wait it out.
 553         */
 554        wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
 555}
 556
 557static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 558{
 559        struct fuse_iqueue *fiq = &fc->iq;
 560
 561        BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
 562        spin_lock(&fiq->waitq.lock);
 563        if (!fiq->connected) {
 564                spin_unlock(&fiq->waitq.lock);
 565                req->out.h.error = -ENOTCONN;
 566        } else {
 567                req->in.h.unique = fuse_get_unique(fiq);
 568                queue_request(fiq, req);
 569                /* acquire extra reference, since request is still needed
 570                   after request_end() */
 571                __fuse_get_request(req);
 572                spin_unlock(&fiq->waitq.lock);
 573
 574                request_wait_answer(fc, req);
 575                /* Pairs with smp_wmb() in request_end() */
 576                smp_rmb();
 577        }
 578}
 579
 580void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 581{
 582        __set_bit(FR_ISREPLY, &req->flags);
 583        if (!test_bit(FR_WAITING, &req->flags)) {
 584                __set_bit(FR_WAITING, &req->flags);
 585                atomic_inc(&fc->num_waiting);
 586        }
 587        __fuse_request_send(fc, req);
 588}
 589EXPORT_SYMBOL_GPL(fuse_request_send);
 590
 591static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
 592{
 593        if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
 594                args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
 595
 596        if (fc->minor < 9) {
 597                switch (args->in.h.opcode) {
 598                case FUSE_LOOKUP:
 599                case FUSE_CREATE:
 600                case FUSE_MKNOD:
 601                case FUSE_MKDIR:
 602                case FUSE_SYMLINK:
 603                case FUSE_LINK:
 604                        args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 605                        break;
 606                case FUSE_GETATTR:
 607                case FUSE_SETATTR:
 608                        args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 609                        break;
 610                }
 611        }
 612        if (fc->minor < 12) {
 613                switch (args->in.h.opcode) {
 614                case FUSE_CREATE:
 615                        args->in.args[0].size = sizeof(struct fuse_open_in);
 616                        break;
 617                case FUSE_MKNOD:
 618                        args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
 619                        break;
 620                }
 621        }
 622}
 623
 624ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 625{
 626        struct fuse_req *req;
 627        ssize_t ret;
 628
 629        req = fuse_get_req(fc, 0);
 630        if (IS_ERR(req))
 631                return PTR_ERR(req);
 632
 633        /* Needs to be done after fuse_get_req() so that fc->minor is valid */
 634        fuse_adjust_compat(fc, args);
 635
 636        req->in.h.opcode = args->in.h.opcode;
 637        req->in.h.nodeid = args->in.h.nodeid;
 638        req->in.numargs = args->in.numargs;
 639        memcpy(req->in.args, args->in.args,
 640               args->in.numargs * sizeof(struct fuse_in_arg));
 641        req->out.argvar = args->out.argvar;
 642        req->out.numargs = args->out.numargs;
 643        memcpy(req->out.args, args->out.args,
 644               args->out.numargs * sizeof(struct fuse_arg));
 645        fuse_request_send(fc, req);
 646        ret = req->out.h.error;
 647        if (!ret && args->out.argvar) {
 648                BUG_ON(args->out.numargs != 1);
 649                ret = req->out.args[0].size;
 650        }
 651        fuse_put_request(fc, req);
 652
 653        return ret;
 654}
 655
 656bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
 657{
 658        bool queued = false;
 659
 660        WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
 661        if (!test_bit(FR_WAITING, &req->flags)) {
 662                __set_bit(FR_WAITING, &req->flags);
 663                atomic_inc(&fc->num_waiting);
 664        }
 665        __set_bit(FR_ISREPLY, &req->flags);
 666        spin_lock(&fc->bg_lock);
 667        if (likely(fc->connected)) {
 668                fc->num_background++;
 669                if (fc->num_background == fc->max_background)
 670                        fc->blocked = 1;
 671                if (fc->num_background == fc->congestion_threshold && fc->sb) {
 672                        set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
 673                        set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
 674                }
 675                list_add_tail(&req->list, &fc->bg_queue);
 676                flush_bg_queue(fc);
 677                queued = true;
 678        }
 679        spin_unlock(&fc->bg_lock);
 680
 681        return queued;
 682}
 683
 684void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 685{
 686        WARN_ON(!req->end);
 687        if (!fuse_request_queue_background(fc, req)) {
 688                req->out.h.error = -ENOTCONN;
 689                req->end(fc, req);
 690                fuse_put_request(fc, req);
 691        }
 692}
 693EXPORT_SYMBOL_GPL(fuse_request_send_background);
 694
 695static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 696                                          struct fuse_req *req, u64 unique)
 697{
 698        int err = -ENODEV;
 699        struct fuse_iqueue *fiq = &fc->iq;
 700
 701        __clear_bit(FR_ISREPLY, &req->flags);
 702        req->in.h.unique = unique;
 703        spin_lock(&fiq->waitq.lock);
 704        if (fiq->connected) {
 705                queue_request(fiq, req);
 706                err = 0;
 707        }
 708        spin_unlock(&fiq->waitq.lock);
 709
 710        return err;
 711}
 712
 713void fuse_force_forget(struct file *file, u64 nodeid)
 714{
 715        struct inode *inode = file_inode(file);
 716        struct fuse_conn *fc = get_fuse_conn(inode);
 717        struct fuse_req *req;
 718        struct fuse_forget_in inarg;
 719
 720        memset(&inarg, 0, sizeof(inarg));
 721        inarg.nlookup = 1;
 722        req = fuse_get_req_nofail_nopages(fc, file);
 723        req->in.h.opcode = FUSE_FORGET;
 724        req->in.h.nodeid = nodeid;
 725        req->in.numargs = 1;
 726        req->in.args[0].size = sizeof(inarg);
 727        req->in.args[0].value = &inarg;
 728        __clear_bit(FR_ISREPLY, &req->flags);
 729        __fuse_request_send(fc, req);
 730        /* ignore errors */
 731        fuse_put_request(fc, req);
 732}
 733
 734/*
 735 * Lock the request.  Up to the next unlock_request() there mustn't be
 736 * anything that could cause a page-fault.  If the request was already
 737 * aborted bail out.
 738 */
 739static int lock_request(struct fuse_req *req)
 740{
 741        int err = 0;
 742        if (req) {
 743                spin_lock(&req->waitq.lock);
 744                if (test_bit(FR_ABORTED, &req->flags))
 745                        err = -ENOENT;
 746                else
 747                        set_bit(FR_LOCKED, &req->flags);
 748                spin_unlock(&req->waitq.lock);
 749        }
 750        return err;
 751}
 752
 753/*
 754 * Unlock request.  If it was aborted while locked, caller is responsible
 755 * for unlocking and ending the request.
 756 */
 757static int unlock_request(struct fuse_req *req)
 758{
 759        int err = 0;
 760        if (req) {
 761                spin_lock(&req->waitq.lock);
 762                if (test_bit(FR_ABORTED, &req->flags))
 763                        err = -ENOENT;
 764                else
 765                        clear_bit(FR_LOCKED, &req->flags);
 766                spin_unlock(&req->waitq.lock);
 767        }
 768        return err;
 769}
 770
 771struct fuse_copy_state {
 772        int write;
 773        struct fuse_req *req;
 774        struct iov_iter *iter;
 775        struct pipe_buffer *pipebufs;
 776        struct pipe_buffer *currbuf;
 777        struct pipe_inode_info *pipe;
 778        unsigned long nr_segs;
 779        struct page *pg;
 780        unsigned len;
 781        unsigned offset;
 782        unsigned move_pages:1;
 783};
 784
 785static void fuse_copy_init(struct fuse_copy_state *cs, int write,
 786                           struct iov_iter *iter)
 787{
 788        memset(cs, 0, sizeof(*cs));
 789        cs->write = write;
 790        cs->iter = iter;
 791}
 792
 793/* Unmap and put previous page of userspace buffer */
 794static void fuse_copy_finish(struct fuse_copy_state *cs)
 795{
 796        if (cs->currbuf) {
 797                struct pipe_buffer *buf = cs->currbuf;
 798
 799                if (cs->write)
 800                        buf->len = PAGE_SIZE - cs->len;
 801                cs->currbuf = NULL;
 802        } else if (cs->pg) {
 803                if (cs->write) {
 804                        flush_dcache_page(cs->pg);
 805                        set_page_dirty_lock(cs->pg);
 806                }
 807                put_page(cs->pg);
 808        }
 809        cs->pg = NULL;
 810}
 811
 812/*
 813 * Get another pagefull of userspace buffer, and map it to kernel
 814 * address space, and lock request
 815 */
 816static int fuse_copy_fill(struct fuse_copy_state *cs)
 817{
 818        struct page *page;
 819        int err;
 820
 821        err = unlock_request(cs->req);
 822        if (err)
 823                return err;
 824
 825        fuse_copy_finish(cs);
 826        if (cs->pipebufs) {
 827                struct pipe_buffer *buf = cs->pipebufs;
 828
 829                if (!cs->write) {
 830                        err = pipe_buf_confirm(cs->pipe, buf);
 831                        if (err)
 832                                return err;
 833
 834                        BUG_ON(!cs->nr_segs);
 835                        cs->currbuf = buf;
 836                        cs->pg = buf->page;
 837                        cs->offset = buf->offset;
 838                        cs->len = buf->len;
 839                        cs->pipebufs++;
 840                        cs->nr_segs--;
 841                } else {
 842                        if (cs->nr_segs == cs->pipe->buffers)
 843                                return -EIO;
 844
 845                        page = alloc_page(GFP_HIGHUSER);
 846                        if (!page)
 847                                return -ENOMEM;
 848
 849                        buf->page = page;
 850                        buf->offset = 0;
 851                        buf->len = 0;
 852
 853                        cs->currbuf = buf;
 854                        cs->pg = page;
 855                        cs->offset = 0;
 856                        cs->len = PAGE_SIZE;
 857                        cs->pipebufs++;
 858                        cs->nr_segs++;
 859                }
 860        } else {
 861                size_t off;
 862                err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
 863                if (err < 0)
 864                        return err;
 865                BUG_ON(!err);
 866                cs->len = err;
 867                cs->offset = off;
 868                cs->pg = page;
 869                iov_iter_advance(cs->iter, err);
 870        }
 871
 872        return lock_request(cs->req);
 873}
 874
 875/* Do as much copy to/from userspace buffer as we can */
 876static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 877{
 878        unsigned ncpy = min(*size, cs->len);
 879        if (val) {
 880                void *pgaddr = kmap_atomic(cs->pg);
 881                void *buf = pgaddr + cs->offset;
 882
 883                if (cs->write)
 884                        memcpy(buf, *val, ncpy);
 885                else
 886                        memcpy(*val, buf, ncpy);
 887
 888                kunmap_atomic(pgaddr);
 889                *val += ncpy;
 890        }
 891        *size -= ncpy;
 892        cs->len -= ncpy;
 893        cs->offset += ncpy;
 894        return ncpy;
 895}
 896
 897static int fuse_check_page(struct page *page)
 898{
 899        if (page_mapcount(page) ||
 900            page->mapping != NULL ||
 901            page_count(page) != 1 ||
 902            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 903             ~(1 << PG_locked |
 904               1 << PG_referenced |
 905               1 << PG_uptodate |
 906               1 << PG_lru |
 907               1 << PG_active |
 908               1 << PG_reclaim))) {
 909                pr_warn("trying to steal weird page\n");
 910                pr_warn("  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 911                return 1;
 912        }
 913        return 0;
 914}
 915
 916static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 917{
 918        int err;
 919        struct page *oldpage = *pagep;
 920        struct page *newpage;
 921        struct pipe_buffer *buf = cs->pipebufs;
 922
 923        err = unlock_request(cs->req);
 924        if (err)
 925                return err;
 926
 927        fuse_copy_finish(cs);
 928
 929        err = pipe_buf_confirm(cs->pipe, buf);
 930        if (err)
 931                return err;
 932
 933        BUG_ON(!cs->nr_segs);
 934        cs->currbuf = buf;
 935        cs->len = buf->len;
 936        cs->pipebufs++;
 937        cs->nr_segs--;
 938
 939        if (cs->len != PAGE_SIZE)
 940                goto out_fallback;
 941
 942        if (pipe_buf_steal(cs->pipe, buf) != 0)
 943                goto out_fallback;
 944
 945        newpage = buf->page;
 946
 947        if (!PageUptodate(newpage))
 948                SetPageUptodate(newpage);
 949
 950        ClearPageMappedToDisk(newpage);
 951
 952        if (fuse_check_page(newpage) != 0)
 953                goto out_fallback_unlock;
 954
 955        /*
 956         * This is a new and locked page, it shouldn't be mapped or
 957         * have any special flags on it
 958         */
 959        if (WARN_ON(page_mapped(oldpage)))
 960                goto out_fallback_unlock;
 961        if (WARN_ON(page_has_private(oldpage)))
 962                goto out_fallback_unlock;
 963        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 964                goto out_fallback_unlock;
 965        if (WARN_ON(PageMlocked(oldpage)))
 966                goto out_fallback_unlock;
 967
 968        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 969        if (err) {
 970                unlock_page(newpage);
 971                return err;
 972        }
 973
 974        get_page(newpage);
 975
 976        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 977                lru_cache_add_file(newpage);
 978
 979        err = 0;
 980        spin_lock(&cs->req->waitq.lock);
 981        if (test_bit(FR_ABORTED, &cs->req->flags))
 982                err = -ENOENT;
 983        else
 984                *pagep = newpage;
 985        spin_unlock(&cs->req->waitq.lock);
 986
 987        if (err) {
 988                unlock_page(newpage);
 989                put_page(newpage);
 990                return err;
 991        }
 992
 993        unlock_page(oldpage);
 994        put_page(oldpage);
 995        cs->len = 0;
 996
 997        return 0;
 998
 999out_fallback_unlock:
1000        unlock_page(newpage);
1001out_fallback:
1002        cs->pg = buf->page;
1003        cs->offset = buf->offset;
1004
1005        err = lock_request(cs->req);
1006        if (err)
1007                return err;
1008
1009        return 1;
1010}
1011
1012static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
1013                         unsigned offset, unsigned count)
1014{
1015        struct pipe_buffer *buf;
1016        int err;
1017
1018        if (cs->nr_segs == cs->pipe->buffers)
1019                return -EIO;
1020
1021        err = unlock_request(cs->req);
1022        if (err)
1023                return err;
1024
1025        fuse_copy_finish(cs);
1026
1027        buf = cs->pipebufs;
1028        get_page(page);
1029        buf->page = page;
1030        buf->offset = offset;
1031        buf->len = count;
1032
1033        cs->pipebufs++;
1034        cs->nr_segs++;
1035        cs->len = 0;
1036
1037        return 0;
1038}
1039
1040/*
1041 * Copy a page in the request to/from the userspace buffer.  Must be
1042 * done atomically
1043 */
1044static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
1045                          unsigned offset, unsigned count, int zeroing)
1046{
1047        int err;
1048        struct page *page = *pagep;
1049
1050        if (page && zeroing && count < PAGE_SIZE)
1051                clear_highpage(page);
1052
1053        while (count) {
1054                if (cs->write && cs->pipebufs && page) {
1055                        return fuse_ref_page(cs, page, offset, count);
1056                } else if (!cs->len) {
1057                        if (cs->move_pages && page &&
1058                            offset == 0 && count == PAGE_SIZE) {
1059                                err = fuse_try_move_page(cs, pagep);
1060                                if (err <= 0)
1061                                        return err;
1062                        } else {
1063                                err = fuse_copy_fill(cs);
1064                                if (err)
1065                                        return err;
1066                        }
1067                }
1068                if (page) {
1069                        void *mapaddr = kmap_atomic(page);
1070                        void *buf = mapaddr + offset;
1071                        offset += fuse_copy_do(cs, &buf, &count);
1072                        kunmap_atomic(mapaddr);
1073                } else
1074                        offset += fuse_copy_do(cs, NULL, &count);
1075        }
1076        if (page && !cs->write)
1077                flush_dcache_page(page);
1078        return 0;
1079}
1080
1081/* Copy pages in the request to/from userspace buffer */
1082static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
1083                           int zeroing)
1084{
1085        unsigned i;
1086        struct fuse_req *req = cs->req;
1087
1088        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
1089                int err;
1090                unsigned offset = req->page_descs[i].offset;
1091                unsigned count = min(nbytes, req->page_descs[i].length);
1092
1093                err = fuse_copy_page(cs, &req->pages[i], offset, count,
1094                                     zeroing);
1095                if (err)
1096                        return err;
1097
1098                nbytes -= count;
1099        }
1100        return 0;
1101}
1102
1103/* Copy a single argument in the request to/from userspace buffer */
1104static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1105{
1106        while (size) {
1107                if (!cs->len) {
1108                        int err = fuse_copy_fill(cs);
1109                        if (err)
1110                                return err;
1111                }
1112                fuse_copy_do(cs, &val, &size);
1113        }
1114        return 0;
1115}
1116
1117/* Copy request arguments to/from userspace buffer */
1118static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1119                          unsigned argpages, struct fuse_arg *args,
1120                          int zeroing)
1121{
1122        int err = 0;
1123        unsigned i;
1124
1125        for (i = 0; !err && i < numargs; i++)  {
1126                struct fuse_arg *arg = &args[i];
1127                if (i == numargs - 1 && argpages)
1128                        err = fuse_copy_pages(cs, arg->size, zeroing);
1129                else
1130                        err = fuse_copy_one(cs, arg->value, arg->size);
1131        }
1132        return err;
1133}
1134
1135static int forget_pending(struct fuse_iqueue *fiq)
1136{
1137        return fiq->forget_list_head.next != NULL;
1138}
1139
1140static int request_pending(struct fuse_iqueue *fiq)
1141{
1142        return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1143                forget_pending(fiq);
1144}
1145
1146/*
1147 * Transfer an interrupt request to userspace
1148 *
1149 * Unlike other requests this is assembled on demand, without a need
1150 * to allocate a separate fuse_req structure.
1151 *
1152 * Called with fiq->waitq.lock held, releases it
1153 */
1154static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1155                               struct fuse_copy_state *cs,
1156                               size_t nbytes, struct fuse_req *req)
1157__releases(fiq->waitq.lock)
1158{
1159        struct fuse_in_header ih;
1160        struct fuse_interrupt_in arg;
1161        unsigned reqsize = sizeof(ih) + sizeof(arg);
1162        int err;
1163
1164        list_del_init(&req->intr_entry);
1165        memset(&ih, 0, sizeof(ih));
1166        memset(&arg, 0, sizeof(arg));
1167        ih.len = reqsize;
1168        ih.opcode = FUSE_INTERRUPT;
1169        ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1170        arg.unique = req->in.h.unique;
1171
1172        spin_unlock(&fiq->waitq.lock);
1173        if (nbytes < reqsize)
1174                return -EINVAL;
1175
1176        err = fuse_copy_one(cs, &ih, sizeof(ih));
1177        if (!err)
1178                err = fuse_copy_one(cs, &arg, sizeof(arg));
1179        fuse_copy_finish(cs);
1180
1181        return err ? err : reqsize;
1182}
1183
1184static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
1185                                               unsigned max,
1186                                               unsigned *countp)
1187{
1188        struct fuse_forget_link *head = fiq->forget_list_head.next;
1189        struct fuse_forget_link **newhead = &head;
1190        unsigned count;
1191
1192        for (count = 0; *newhead != NULL && count < max; count++)
1193                newhead = &(*newhead)->next;
1194
1195        fiq->forget_list_head.next = *newhead;
1196        *newhead = NULL;
1197        if (fiq->forget_list_head.next == NULL)
1198                fiq->forget_list_tail = &fiq->forget_list_head;
1199
1200        if (countp != NULL)
1201                *countp = count;
1202
1203        return head;
1204}
1205
1206static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1207                                   struct fuse_copy_state *cs,
1208                                   size_t nbytes)
1209__releases(fiq->waitq.lock)
1210{
1211        int err;
1212        struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
1213        struct fuse_forget_in arg = {
1214                .nlookup = forget->forget_one.nlookup,
1215        };
1216        struct fuse_in_header ih = {
1217                .opcode = FUSE_FORGET,
1218                .nodeid = forget->forget_one.nodeid,
1219                .unique = fuse_get_unique(fiq),
1220                .len = sizeof(ih) + sizeof(arg),
1221        };
1222
1223        spin_unlock(&fiq->waitq.lock);
1224        kfree(forget);
1225        if (nbytes < ih.len)
1226                return -EINVAL;
1227
1228        err = fuse_copy_one(cs, &ih, sizeof(ih));
1229        if (!err)
1230                err = fuse_copy_one(cs, &arg, sizeof(arg));
1231        fuse_copy_finish(cs);
1232
1233        if (err)
1234                return err;
1235
1236        return ih.len;
1237}
1238
1239static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1240                                   struct fuse_copy_state *cs, size_t nbytes)
1241__releases(fiq->waitq.lock)
1242{
1243        int err;
1244        unsigned max_forgets;
1245        unsigned count;
1246        struct fuse_forget_link *head;
1247        struct fuse_batch_forget_in arg = { .count = 0 };
1248        struct fuse_in_header ih = {
1249                .opcode = FUSE_BATCH_FORGET,
1250                .unique = fuse_get_unique(fiq),
1251                .len = sizeof(ih) + sizeof(arg),
1252        };
1253
1254        if (nbytes < ih.len) {
1255                spin_unlock(&fiq->waitq.lock);
1256                return -EINVAL;
1257        }
1258
1259        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1260        head = dequeue_forget(fiq, max_forgets, &count);
1261        spin_unlock(&fiq->waitq.lock);
1262
1263        arg.count = count;
1264        ih.len += count * sizeof(struct fuse_forget_one);
1265        err = fuse_copy_one(cs, &ih, sizeof(ih));
1266        if (!err)
1267                err = fuse_copy_one(cs, &arg, sizeof(arg));
1268
1269        while (head) {
1270                struct fuse_forget_link *forget = head;
1271
1272                if (!err) {
1273                        err = fuse_copy_one(cs, &forget->forget_one,
1274                                            sizeof(forget->forget_one));
1275                }
1276                head = forget->next;
1277                kfree(forget);
1278        }
1279
1280        fuse_copy_finish(cs);
1281
1282        if (err)
1283                return err;
1284
1285        return ih.len;
1286}
1287
1288static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1289                            struct fuse_copy_state *cs,
1290                            size_t nbytes)
1291__releases(fiq->waitq.lock)
1292{
1293        if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1294                return fuse_read_single_forget(fiq, cs, nbytes);
1295        else
1296                return fuse_read_batch_forget(fiq, cs, nbytes);
1297}
1298
1299/*
1300 * Read a single request into the userspace filesystem's buffer.  This
1301 * function waits until a request is available, then removes it from
1302 * the pending list and copies request data to userspace buffer.  If
1303 * no reply is needed (FORGET) or request has been aborted or there
1304 * was an error during the copying then it's finished by calling
1305 * request_end().  Otherwise add it to the processing list, and set
1306 * the 'sent' flag.
1307 */
1308static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1309                                struct fuse_copy_state *cs, size_t nbytes)
1310{
1311        ssize_t err;
1312        struct fuse_conn *fc = fud->fc;
1313        struct fuse_iqueue *fiq = &fc->iq;
1314        struct fuse_pqueue *fpq = &fud->pq;
1315        struct fuse_req *req;
1316        struct fuse_in *in;
1317        unsigned reqsize;
1318        unsigned int hash;
1319
1320 restart:
1321        spin_lock(&fiq->waitq.lock);
1322        err = -EAGAIN;
1323        if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
1324            !request_pending(fiq))
1325                goto err_unlock;
1326
1327        err = wait_event_interruptible_exclusive_locked(fiq->waitq,
1328                                !fiq->connected || request_pending(fiq));
1329        if (err)
1330                goto err_unlock;
1331
1332        if (!fiq->connected) {
1333                err = fc->aborted ? -ECONNABORTED : -ENODEV;
1334                goto err_unlock;
1335        }
1336
1337        if (!list_empty(&fiq->interrupts)) {
1338                req = list_entry(fiq->interrupts.next, struct fuse_req,
1339                                 intr_entry);
1340                return fuse_read_interrupt(fiq, cs, nbytes, req);
1341        }
1342
1343        if (forget_pending(fiq)) {
1344                if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1345                        return fuse_read_forget(fc, fiq, cs, nbytes);
1346
1347                if (fiq->forget_batch <= -8)
1348                        fiq->forget_batch = 16;
1349        }
1350
1351        req = list_entry(fiq->pending.next, struct fuse_req, list);
1352        clear_bit(FR_PENDING, &req->flags);
1353        list_del_init(&req->list);
1354        spin_unlock(&fiq->waitq.lock);
1355
1356        in = &req->in;
1357        reqsize = in->h.len;
1358
1359        /* If request is too large, reply with an error and restart the read */
1360        if (nbytes < reqsize) {
1361                req->out.h.error = -EIO;
1362                /* SETXATTR is special, since it may contain too large data */
1363                if (in->h.opcode == FUSE_SETXATTR)
1364                        req->out.h.error = -E2BIG;
1365                request_end(fc, req);
1366                goto restart;
1367        }
1368        spin_lock(&fpq->lock);
1369        list_add(&req->list, &fpq->io);
1370        spin_unlock(&fpq->lock);
1371        cs->req = req;
1372        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1373        if (!err)
1374                err = fuse_copy_args(cs, in->numargs, in->argpages,
1375                                     (struct fuse_arg *) in->args, 0);
1376        fuse_copy_finish(cs);
1377        spin_lock(&fpq->lock);
1378        clear_bit(FR_LOCKED, &req->flags);
1379        if (!fpq->connected) {
1380                err = fc->aborted ? -ECONNABORTED : -ENODEV;
1381                goto out_end;
1382        }
1383        if (err) {
1384                req->out.h.error = -EIO;
1385                goto out_end;
1386        }
1387        if (!test_bit(FR_ISREPLY, &req->flags)) {
1388                err = reqsize;
1389                goto out_end;
1390        }
1391        hash = fuse_req_hash(req->in.h.unique);
1392        list_move_tail(&req->list, &fpq->processing[hash]);
1393        __fuse_get_request(req);
1394        set_bit(FR_SENT, &req->flags);
1395        spin_unlock(&fpq->lock);
1396        /* matches barrier in request_wait_answer() */
1397        smp_mb__after_atomic();
1398        if (test_bit(FR_INTERRUPTED, &req->flags))
1399                queue_interrupt(fiq, req);
1400        fuse_put_request(fc, req);
1401
1402        return reqsize;
1403
1404out_end:
1405        if (!test_bit(FR_PRIVATE, &req->flags))
1406                list_del_init(&req->list);
1407        spin_unlock(&fpq->lock);
1408        request_end(fc, req);
1409        return err;
1410
1411 err_unlock:
1412        spin_unlock(&fiq->waitq.lock);
1413        return err;
1414}
1415
1416static int fuse_dev_open(struct inode *inode, struct file *file)
1417{
1418        /*
1419         * The fuse device's file's private_data is used to hold
1420         * the fuse_conn(ection) when it is mounted, and is used to
1421         * keep track of whether the file has been mounted already.
1422         */
1423        file->private_data = NULL;
1424        return 0;
1425}
1426
1427static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1428{
1429        struct fuse_copy_state cs;
1430        struct file *file = iocb->ki_filp;
1431        struct fuse_dev *fud = fuse_get_dev(file);
1432
1433        if (!fud)
1434                return -EPERM;
1435
1436        if (!iter_is_iovec(to))
1437                return -EINVAL;
1438
1439        fuse_copy_init(&cs, 1, to);
1440
1441        return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1442}
1443
1444static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1445                                    struct pipe_inode_info *pipe,
1446                                    size_t len, unsigned int flags)
1447{
1448        int total, ret;
1449        int page_nr = 0;
1450        struct pipe_buffer *bufs;
1451        struct fuse_copy_state cs;
1452        struct fuse_dev *fud = fuse_get_dev(in);
1453
1454        if (!fud)
1455                return -EPERM;
1456
1457        bufs = kvmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
1458                              GFP_KERNEL);
1459        if (!bufs)
1460                return -ENOMEM;
1461
1462        fuse_copy_init(&cs, 1, NULL);
1463        cs.pipebufs = bufs;
1464        cs.pipe = pipe;
1465        ret = fuse_dev_do_read(fud, in, &cs, len);
1466        if (ret < 0)
1467                goto out;
1468
1469        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1470                ret = -EIO;
1471                goto out;
1472        }
1473
1474        for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1475                /*
1476                 * Need to be careful about this.  Having buf->ops in module
1477                 * code can Oops if the buffer persists after module unload.
1478                 */
1479                bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1480                bufs[page_nr].flags = 0;
1481                ret = add_to_pipe(pipe, &bufs[page_nr++]);
1482                if (unlikely(ret < 0))
1483                        break;
1484        }
1485        if (total)
1486                ret = total;
1487out:
1488        for (; page_nr < cs.nr_segs; page_nr++)
1489                put_page(bufs[page_nr].page);
1490
1491        kvfree(bufs);
1492        return ret;
1493}
1494
1495static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1496                            struct fuse_copy_state *cs)
1497{
1498        struct fuse_notify_poll_wakeup_out outarg;
1499        int err = -EINVAL;
1500
1501        if (size != sizeof(outarg))
1502                goto err;
1503
1504        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1505        if (err)
1506                goto err;
1507
1508        fuse_copy_finish(cs);
1509        return fuse_notify_poll_wakeup(fc, &outarg);
1510
1511err:
1512        fuse_copy_finish(cs);
1513        return err;
1514}
1515
1516static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1517                                   struct fuse_copy_state *cs)
1518{
1519        struct fuse_notify_inval_inode_out outarg;
1520        int err = -EINVAL;
1521
1522        if (size != sizeof(outarg))
1523                goto err;
1524
1525        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1526        if (err)
1527                goto err;
1528        fuse_copy_finish(cs);
1529
1530        down_read(&fc->killsb);
1531        err = -ENOENT;
1532        if (fc->sb) {
1533                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1534                                               outarg.off, outarg.len);
1535        }
1536        up_read(&fc->killsb);
1537        return err;
1538
1539err:
1540        fuse_copy_finish(cs);
1541        return err;
1542}
1543
1544static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1545                                   struct fuse_copy_state *cs)
1546{
1547        struct fuse_notify_inval_entry_out outarg;
1548        int err = -ENOMEM;
1549        char *buf;
1550        struct qstr name;
1551
1552        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1553        if (!buf)
1554                goto err;
1555
1556        err = -EINVAL;
1557        if (size < sizeof(outarg))
1558                goto err;
1559
1560        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1561        if (err)
1562                goto err;
1563
1564        err = -ENAMETOOLONG;
1565        if (outarg.namelen > FUSE_NAME_MAX)
1566                goto err;
1567
1568        err = -EINVAL;
1569        if (size != sizeof(outarg) + outarg.namelen + 1)
1570                goto err;
1571
1572        name.name = buf;
1573        name.len = outarg.namelen;
1574        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1575        if (err)
1576                goto err;
1577        fuse_copy_finish(cs);
1578        buf[outarg.namelen] = 0;
1579
1580        down_read(&fc->killsb);
1581        err = -ENOENT;
1582        if (fc->sb)
1583                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1584        up_read(&fc->killsb);
1585        kfree(buf);
1586        return err;
1587
1588err:
1589        kfree(buf);
1590        fuse_copy_finish(cs);
1591        return err;
1592}
1593
1594static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1595                              struct fuse_copy_state *cs)
1596{
1597        struct fuse_notify_delete_out outarg;
1598        int err = -ENOMEM;
1599        char *buf;
1600        struct qstr name;
1601
1602        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1603        if (!buf)
1604                goto err;
1605
1606        err = -EINVAL;
1607        if (size < sizeof(outarg))
1608                goto err;
1609
1610        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1611        if (err)
1612                goto err;
1613
1614        err = -ENAMETOOLONG;
1615        if (outarg.namelen > FUSE_NAME_MAX)
1616                goto err;
1617
1618        err = -EINVAL;
1619        if (size != sizeof(outarg) + outarg.namelen + 1)
1620                goto err;
1621
1622        name.name = buf;
1623        name.len = outarg.namelen;
1624        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1625        if (err)
1626                goto err;
1627        fuse_copy_finish(cs);
1628        buf[outarg.namelen] = 0;
1629
1630        down_read(&fc->killsb);
1631        err = -ENOENT;
1632        if (fc->sb)
1633                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1634                                               outarg.child, &name);
1635        up_read(&fc->killsb);
1636        kfree(buf);
1637        return err;
1638
1639err:
1640        kfree(buf);
1641        fuse_copy_finish(cs);
1642        return err;
1643}
1644
1645static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1646                             struct fuse_copy_state *cs)
1647{
1648        struct fuse_notify_store_out outarg;
1649        struct inode *inode;
1650        struct address_space *mapping;
1651        u64 nodeid;
1652        int err;
1653        pgoff_t index;
1654        unsigned int offset;
1655        unsigned int num;
1656        loff_t file_size;
1657        loff_t end;
1658
1659        err = -EINVAL;
1660        if (size < sizeof(outarg))
1661                goto out_finish;
1662
1663        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1664        if (err)
1665                goto out_finish;
1666
1667        err = -EINVAL;
1668        if (size - sizeof(outarg) != outarg.size)
1669                goto out_finish;
1670
1671        nodeid = outarg.nodeid;
1672
1673        down_read(&fc->killsb);
1674
1675        err = -ENOENT;
1676        if (!fc->sb)
1677                goto out_up_killsb;
1678
1679        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1680        if (!inode)
1681                goto out_up_killsb;
1682
1683        mapping = inode->i_mapping;
1684        index = outarg.offset >> PAGE_SHIFT;
1685        offset = outarg.offset & ~PAGE_MASK;
1686        file_size = i_size_read(inode);
1687        end = outarg.offset + outarg.size;
1688        if (end > file_size) {
1689                file_size = end;
1690                fuse_write_update_size(inode, file_size);
1691        }
1692
1693        num = outarg.size;
1694        while (num) {
1695                struct page *page;
1696                unsigned int this_num;
1697
1698                err = -ENOMEM;
1699                page = find_or_create_page(mapping, index,
1700                                           mapping_gfp_mask(mapping));
1701                if (!page)
1702                        goto out_iput;
1703
1704                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1705                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1706                if (!err && offset == 0 &&
1707                    (this_num == PAGE_SIZE || file_size == end))
1708                        SetPageUptodate(page);
1709                unlock_page(page);
1710                put_page(page);
1711
1712                if (err)
1713                        goto out_iput;
1714
1715                num -= this_num;
1716                offset = 0;
1717                index++;
1718        }
1719
1720        err = 0;
1721
1722out_iput:
1723        iput(inode);
1724out_up_killsb:
1725        up_read(&fc->killsb);
1726out_finish:
1727        fuse_copy_finish(cs);
1728        return err;
1729}
1730
1731static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1732{
1733        release_pages(req->pages, req->num_pages);
1734}
1735
1736static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1737                         struct fuse_notify_retrieve_out *outarg)
1738{
1739        int err;
1740        struct address_space *mapping = inode->i_mapping;
1741        struct fuse_req *req;
1742        pgoff_t index;
1743        loff_t file_size;
1744        unsigned int num;
1745        unsigned int offset;
1746        size_t total_len = 0;
1747        unsigned int num_pages;
1748
1749        offset = outarg->offset & ~PAGE_MASK;
1750        file_size = i_size_read(inode);
1751
1752        num = min(outarg->size, fc->max_write);
1753        if (outarg->offset > file_size)
1754                num = 0;
1755        else if (outarg->offset + num > file_size)
1756                num = file_size - outarg->offset;
1757
1758        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1759        num_pages = min(num_pages, fc->max_pages);
1760
1761        req = fuse_get_req(fc, num_pages);
1762        if (IS_ERR(req))
1763                return PTR_ERR(req);
1764
1765        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1766        req->in.h.nodeid = outarg->nodeid;
1767        req->in.numargs = 2;
1768        req->in.argpages = 1;
1769        req->end = fuse_retrieve_end;
1770
1771        index = outarg->offset >> PAGE_SHIFT;
1772
1773        while (num && req->num_pages < num_pages) {
1774                struct page *page;
1775                unsigned int this_num;
1776
1777                page = find_get_page(mapping, index);
1778                if (!page)
1779                        break;
1780
1781                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1782                req->pages[req->num_pages] = page;
1783                req->page_descs[req->num_pages].offset = offset;
1784                req->page_descs[req->num_pages].length = this_num;
1785                req->num_pages++;
1786
1787                offset = 0;
1788                num -= this_num;
1789                total_len += this_num;
1790                index++;
1791        }
1792        req->misc.retrieve_in.offset = outarg->offset;
1793        req->misc.retrieve_in.size = total_len;
1794        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1795        req->in.args[0].value = &req->misc.retrieve_in;
1796        req->in.args[1].size = total_len;
1797
1798        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1799        if (err) {
1800                fuse_retrieve_end(fc, req);
1801                fuse_put_request(fc, req);
1802        }
1803
1804        return err;
1805}
1806
1807static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1808                                struct fuse_copy_state *cs)
1809{
1810        struct fuse_notify_retrieve_out outarg;
1811        struct inode *inode;
1812        int err;
1813
1814        err = -EINVAL;
1815        if (size != sizeof(outarg))
1816                goto copy_finish;
1817
1818        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1819        if (err)
1820                goto copy_finish;
1821
1822        fuse_copy_finish(cs);
1823
1824        down_read(&fc->killsb);
1825        err = -ENOENT;
1826        if (fc->sb) {
1827                u64 nodeid = outarg.nodeid;
1828
1829                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1830                if (inode) {
1831                        err = fuse_retrieve(fc, inode, &outarg);
1832                        iput(inode);
1833                }
1834        }
1835        up_read(&fc->killsb);
1836
1837        return err;
1838
1839copy_finish:
1840        fuse_copy_finish(cs);
1841        return err;
1842}
1843
1844static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1845                       unsigned int size, struct fuse_copy_state *cs)
1846{
1847        /* Don't try to move pages (yet) */
1848        cs->move_pages = 0;
1849
1850        switch (code) {
1851        case FUSE_NOTIFY_POLL:
1852                return fuse_notify_poll(fc, size, cs);
1853
1854        case FUSE_NOTIFY_INVAL_INODE:
1855                return fuse_notify_inval_inode(fc, size, cs);
1856
1857        case FUSE_NOTIFY_INVAL_ENTRY:
1858                return fuse_notify_inval_entry(fc, size, cs);
1859
1860        case FUSE_NOTIFY_STORE:
1861                return fuse_notify_store(fc, size, cs);
1862
1863        case FUSE_NOTIFY_RETRIEVE:
1864                return fuse_notify_retrieve(fc, size, cs);
1865
1866        case FUSE_NOTIFY_DELETE:
1867                return fuse_notify_delete(fc, size, cs);
1868
1869        default:
1870                fuse_copy_finish(cs);
1871                return -EINVAL;
1872        }
1873}
1874
1875/* Look up request on processing list by unique ID */
1876static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1877{
1878        unsigned int hash = fuse_req_hash(unique);
1879        struct fuse_req *req;
1880
1881        list_for_each_entry(req, &fpq->processing[hash], list) {
1882                if (req->in.h.unique == unique)
1883                        return req;
1884        }
1885        return NULL;
1886}
1887
1888static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1889                         unsigned nbytes)
1890{
1891        unsigned reqsize = sizeof(struct fuse_out_header);
1892
1893        if (out->h.error)
1894                return nbytes != reqsize ? -EINVAL : 0;
1895
1896        reqsize += len_args(out->numargs, out->args);
1897
1898        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1899                return -EINVAL;
1900        else if (reqsize > nbytes) {
1901                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1902                unsigned diffsize = reqsize - nbytes;
1903                if (diffsize > lastarg->size)
1904                        return -EINVAL;
1905                lastarg->size -= diffsize;
1906        }
1907        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1908                              out->page_zeroing);
1909}
1910
1911/*
1912 * Write a single reply to a request.  First the header is copied from
1913 * the write buffer.  The request is then searched on the processing
1914 * list by the unique ID found in the header.  If found, then remove
1915 * it from the list and copy the rest of the buffer to the request.
1916 * The request is finished by calling request_end()
1917 */
1918static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1919                                 struct fuse_copy_state *cs, size_t nbytes)
1920{
1921        int err;
1922        struct fuse_conn *fc = fud->fc;
1923        struct fuse_pqueue *fpq = &fud->pq;
1924        struct fuse_req *req;
1925        struct fuse_out_header oh;
1926
1927        err = -EINVAL;
1928        if (nbytes < sizeof(struct fuse_out_header))
1929                goto out;
1930
1931        err = fuse_copy_one(cs, &oh, sizeof(oh));
1932        if (err)
1933                goto copy_finish;
1934
1935        err = -EINVAL;
1936        if (oh.len != nbytes)
1937                goto copy_finish;
1938
1939        /*
1940         * Zero oh.unique indicates unsolicited notification message
1941         * and error contains notification code.
1942         */
1943        if (!oh.unique) {
1944                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1945                goto out;
1946        }
1947
1948        err = -EINVAL;
1949        if (oh.error <= -1000 || oh.error > 0)
1950                goto copy_finish;
1951
1952        spin_lock(&fpq->lock);
1953        req = NULL;
1954        if (fpq->connected)
1955                req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1956
1957        err = -ENOENT;
1958        if (!req) {
1959                spin_unlock(&fpq->lock);
1960                goto copy_finish;
1961        }
1962
1963        /* Is it an interrupt reply ID? */
1964        if (oh.unique & FUSE_INT_REQ_BIT) {
1965                __fuse_get_request(req);
1966                spin_unlock(&fpq->lock);
1967
1968                err = 0;
1969                if (nbytes != sizeof(struct fuse_out_header))
1970                        err = -EINVAL;
1971                else if (oh.error == -ENOSYS)
1972                        fc->no_interrupt = 1;
1973                else if (oh.error == -EAGAIN)
1974                        err = queue_interrupt(&fc->iq, req);
1975
1976                fuse_put_request(fc, req);
1977
1978                goto copy_finish;
1979        }
1980
1981        clear_bit(FR_SENT, &req->flags);
1982        list_move(&req->list, &fpq->io);
1983        req->out.h = oh;
1984        set_bit(FR_LOCKED, &req->flags);
1985        spin_unlock(&fpq->lock);
1986        cs->req = req;
1987        if (!req->out.page_replace)
1988                cs->move_pages = 0;
1989
1990        err = copy_out_args(cs, &req->out, nbytes);
1991        fuse_copy_finish(cs);
1992
1993        spin_lock(&fpq->lock);
1994        clear_bit(FR_LOCKED, &req->flags);
1995        if (!fpq->connected)
1996                err = -ENOENT;
1997        else if (err)
1998                req->out.h.error = -EIO;
1999        if (!test_bit(FR_PRIVATE, &req->flags))
2000                list_del_init(&req->list);
2001        spin_unlock(&fpq->lock);
2002
2003        request_end(fc, req);
2004out:
2005        return err ? err : nbytes;
2006
2007copy_finish:
2008        fuse_copy_finish(cs);
2009        goto out;
2010}
2011
2012static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
2013{
2014        struct fuse_copy_state cs;
2015        struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
2016
2017        if (!fud)
2018                return -EPERM;
2019
2020        if (!iter_is_iovec(from))
2021                return -EINVAL;
2022
2023        fuse_copy_init(&cs, 0, from);
2024
2025        return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
2026}
2027
2028static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2029                                     struct file *out, loff_t *ppos,
2030                                     size_t len, unsigned int flags)
2031{
2032        unsigned nbuf;
2033        unsigned idx;
2034        struct pipe_buffer *bufs;
2035        struct fuse_copy_state cs;
2036        struct fuse_dev *fud;
2037        size_t rem;
2038        ssize_t ret;
2039
2040        fud = fuse_get_dev(out);
2041        if (!fud)
2042                return -EPERM;
2043
2044        pipe_lock(pipe);
2045
2046        bufs = kvmalloc_array(pipe->nrbufs, sizeof(struct pipe_buffer),
2047                              GFP_KERNEL);
2048        if (!bufs) {
2049                pipe_unlock(pipe);
2050                return -ENOMEM;
2051        }
2052
2053        nbuf = 0;
2054        rem = 0;
2055        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
2056                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
2057
2058        ret = -EINVAL;
2059        if (rem < len)
2060                goto out_free;
2061
2062        rem = len;
2063        while (rem) {
2064                struct pipe_buffer *ibuf;
2065                struct pipe_buffer *obuf;
2066
2067                BUG_ON(nbuf >= pipe->buffers);
2068                BUG_ON(!pipe->nrbufs);
2069                ibuf = &pipe->bufs[pipe->curbuf];
2070                obuf = &bufs[nbuf];
2071
2072                if (rem >= ibuf->len) {
2073                        *obuf = *ibuf;
2074                        ibuf->ops = NULL;
2075                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
2076                        pipe->nrbufs--;
2077                } else {
2078                        if (!pipe_buf_get(pipe, ibuf))
2079                                goto out_free;
2080
2081                        *obuf = *ibuf;
2082                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2083                        obuf->len = rem;
2084                        ibuf->offset += obuf->len;
2085                        ibuf->len -= obuf->len;
2086                }
2087                nbuf++;
2088                rem -= obuf->len;
2089        }
2090        pipe_unlock(pipe);
2091
2092        fuse_copy_init(&cs, 0, NULL);
2093        cs.pipebufs = bufs;
2094        cs.nr_segs = nbuf;
2095        cs.pipe = pipe;
2096
2097        if (flags & SPLICE_F_MOVE)
2098                cs.move_pages = 1;
2099
2100        ret = fuse_dev_do_write(fud, &cs, len);
2101
2102        pipe_lock(pipe);
2103out_free:
2104        for (idx = 0; idx < nbuf; idx++)
2105                pipe_buf_release(pipe, &bufs[idx]);
2106        pipe_unlock(pipe);
2107
2108        kvfree(bufs);
2109        return ret;
2110}
2111
2112static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2113{
2114        __poll_t mask = EPOLLOUT | EPOLLWRNORM;
2115        struct fuse_iqueue *fiq;
2116        struct fuse_dev *fud = fuse_get_dev(file);
2117
2118        if (!fud)
2119                return EPOLLERR;
2120
2121        fiq = &fud->fc->iq;
2122        poll_wait(file, &fiq->waitq, wait);
2123
2124        spin_lock(&fiq->waitq.lock);
2125        if (!fiq->connected)
2126                mask = EPOLLERR;
2127        else if (request_pending(fiq))
2128                mask |= EPOLLIN | EPOLLRDNORM;
2129        spin_unlock(&fiq->waitq.lock);
2130
2131        return mask;
2132}
2133
2134/* Abort all requests on the given list (pending or processing) */
2135static void end_requests(struct fuse_conn *fc, struct list_head *head)
2136{
2137        while (!list_empty(head)) {
2138                struct fuse_req *req;
2139                req = list_entry(head->next, struct fuse_req, list);
2140                req->out.h.error = -ECONNABORTED;
2141                clear_bit(FR_SENT, &req->flags);
2142                list_del_init(&req->list);
2143                request_end(fc, req);
2144        }
2145}
2146
2147static void end_polls(struct fuse_conn *fc)
2148{
2149        struct rb_node *p;
2150
2151        p = rb_first(&fc->polled_files);
2152
2153        while (p) {
2154                struct fuse_file *ff;
2155                ff = rb_entry(p, struct fuse_file, polled_node);
2156                wake_up_interruptible_all(&ff->poll_wait);
2157
2158                p = rb_next(p);
2159        }
2160}
2161
2162/*
2163 * Abort all requests.
2164 *
2165 * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2166 * filesystem.
2167 *
2168 * The same effect is usually achievable through killing the filesystem daemon
2169 * and all users of the filesystem.  The exception is the combination of an
2170 * asynchronous request and the tricky deadlock (see
2171 * Documentation/filesystems/fuse.txt).
2172 *
2173 * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2174 * requests, they should be finished off immediately.  Locked requests will be
2175 * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2176 * requests.  It is possible that some request will finish before we can.  This
2177 * is OK, the request will in that case be removed from the list before we touch
2178 * it.
2179 */
2180void fuse_abort_conn(struct fuse_conn *fc)
2181{
2182        struct fuse_iqueue *fiq = &fc->iq;
2183
2184        spin_lock(&fc->lock);
2185        if (fc->connected) {
2186                struct fuse_dev *fud;
2187                struct fuse_req *req, *next;
2188                LIST_HEAD(to_end);
2189                unsigned int i;
2190
2191                /* Background queuing checks fc->connected under bg_lock */
2192                spin_lock(&fc->bg_lock);
2193                fc->connected = 0;
2194                spin_unlock(&fc->bg_lock);
2195
2196                fuse_set_initialized(fc);
2197                list_for_each_entry(fud, &fc->devices, entry) {
2198                        struct fuse_pqueue *fpq = &fud->pq;
2199
2200                        spin_lock(&fpq->lock);
2201                        fpq->connected = 0;
2202                        list_for_each_entry_safe(req, next, &fpq->io, list) {
2203                                req->out.h.error = -ECONNABORTED;
2204                                spin_lock(&req->waitq.lock);
2205                                set_bit(FR_ABORTED, &req->flags);
2206                                if (!test_bit(FR_LOCKED, &req->flags)) {
2207                                        set_bit(FR_PRIVATE, &req->flags);
2208                                        __fuse_get_request(req);
2209                                        list_move(&req->list, &to_end);
2210                                }
2211                                spin_unlock(&req->waitq.lock);
2212                        }
2213                        for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2214                                list_splice_tail_init(&fpq->processing[i],
2215                                                      &to_end);
2216                        spin_unlock(&fpq->lock);
2217                }
2218                spin_lock(&fc->bg_lock);
2219                fc->blocked = 0;
2220                fc->max_background = UINT_MAX;
2221                flush_bg_queue(fc);
2222                spin_unlock(&fc->bg_lock);
2223
2224                spin_lock(&fiq->waitq.lock);
2225                fiq->connected = 0;
2226                list_for_each_entry(req, &fiq->pending, list)
2227                        clear_bit(FR_PENDING, &req->flags);
2228                list_splice_tail_init(&fiq->pending, &to_end);
2229                while (forget_pending(fiq))
2230                        kfree(dequeue_forget(fiq, 1, NULL));
2231                wake_up_all_locked(&fiq->waitq);
2232                spin_unlock(&fiq->waitq.lock);
2233                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2234                end_polls(fc);
2235                wake_up_all(&fc->blocked_waitq);
2236                spin_unlock(&fc->lock);
2237
2238                end_requests(fc, &to_end);
2239        } else {
2240                spin_unlock(&fc->lock);
2241        }
2242}
2243EXPORT_SYMBOL_GPL(fuse_abort_conn);
2244
2245void fuse_wait_aborted(struct fuse_conn *fc)
2246{
2247        /* matches implicit memory barrier in fuse_drop_waiting() */
2248        smp_mb();
2249        wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2250}
2251
2252int fuse_dev_release(struct inode *inode, struct file *file)
2253{
2254        struct fuse_dev *fud = fuse_get_dev(file);
2255
2256        if (fud) {
2257                struct fuse_conn *fc = fud->fc;
2258                struct fuse_pqueue *fpq = &fud->pq;
2259                LIST_HEAD(to_end);
2260                unsigned int i;
2261
2262                spin_lock(&fpq->lock);
2263                WARN_ON(!list_empty(&fpq->io));
2264                for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2265                        list_splice_init(&fpq->processing[i], &to_end);
2266                spin_unlock(&fpq->lock);
2267
2268                end_requests(fc, &to_end);
2269
2270                /* Are we the last open device? */
2271                if (atomic_dec_and_test(&fc->dev_count)) {
2272                        WARN_ON(fc->iq.fasync != NULL);
2273                        fuse_abort_conn(fc);
2274                }
2275                fuse_dev_free(fud);
2276        }
2277        return 0;
2278}
2279EXPORT_SYMBOL_GPL(fuse_dev_release);
2280
2281static int fuse_dev_fasync(int fd, struct file *file, int on)
2282{
2283        struct fuse_dev *fud = fuse_get_dev(file);
2284
2285        if (!fud)
2286                return -EPERM;
2287
2288        /* No locking - fasync_helper does its own locking */
2289        return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2290}
2291
2292static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2293{
2294        struct fuse_dev *fud;
2295
2296        if (new->private_data)
2297                return -EINVAL;
2298
2299        fud = fuse_dev_alloc(fc);
2300        if (!fud)
2301                return -ENOMEM;
2302
2303        new->private_data = fud;
2304        atomic_inc(&fc->dev_count);
2305
2306        return 0;
2307}
2308
2309static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2310                           unsigned long arg)
2311{
2312        int err = -ENOTTY;
2313
2314        if (cmd == FUSE_DEV_IOC_CLONE) {
2315                int oldfd;
2316
2317                err = -EFAULT;
2318                if (!get_user(oldfd, (__u32 __user *) arg)) {
2319                        struct file *old = fget(oldfd);
2320
2321                        err = -EINVAL;
2322                        if (old) {
2323                                struct fuse_dev *fud = NULL;
2324
2325                                /*
2326                                 * Check against file->f_op because CUSE
2327                                 * uses the same ioctl handler.
2328                                 */
2329                                if (old->f_op == file->f_op &&
2330                                    old->f_cred->user_ns == file->f_cred->user_ns)
2331                                        fud = fuse_get_dev(old);
2332
2333                                if (fud) {
2334                                        mutex_lock(&fuse_mutex);
2335                                        err = fuse_device_clone(fud->fc, file);
2336                                        mutex_unlock(&fuse_mutex);
2337                                }
2338                                fput(old);
2339                        }
2340                }
2341        }
2342        return err;
2343}
2344
2345const struct file_operations fuse_dev_operations = {
2346        .owner          = THIS_MODULE,
2347        .open           = fuse_dev_open,
2348        .llseek         = no_llseek,
2349        .read_iter      = fuse_dev_read,
2350        .splice_read    = fuse_dev_splice_read,
2351        .write_iter     = fuse_dev_write,
2352        .splice_write   = fuse_dev_splice_write,
2353        .poll           = fuse_dev_poll,
2354        .release        = fuse_dev_release,
2355        .fasync         = fuse_dev_fasync,
2356        .unlocked_ioctl = fuse_dev_ioctl,
2357        .compat_ioctl   = fuse_dev_ioctl,
2358};
2359EXPORT_SYMBOL_GPL(fuse_dev_operations);
2360
2361static struct miscdevice fuse_miscdevice = {
2362        .minor = FUSE_MINOR,
2363        .name  = "fuse",
2364        .fops = &fuse_dev_operations,
2365};
2366
2367int __init fuse_dev_init(void)
2368{
2369        int err = -ENOMEM;
2370        fuse_req_cachep = kmem_cache_create("fuse_request",
2371                                            sizeof(struct fuse_req),
2372                                            0, 0, NULL);
2373        if (!fuse_req_cachep)
2374                goto out;
2375
2376        err = misc_register(&fuse_miscdevice);
2377        if (err)
2378                goto out_cache_clean;
2379
2380        return 0;
2381
2382 out_cache_clean:
2383        kmem_cache_destroy(fuse_req_cachep);
2384 out:
2385        return err;
2386}
2387
2388void fuse_dev_cleanup(void)
2389{
2390        misc_deregister(&fuse_miscdevice);
2391        kmem_cache_destroy(fuse_req_cachep);
2392}
2393