linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/sched/signal.h>
  15#include <linux/uio.h>
  16#include <linux/miscdevice.h>
  17#include <linux/pagemap.h>
  18#include <linux/file.h>
  19#include <linux/slab.h>
  20#include <linux/pipe_fs_i.h>
  21#include <linux/swap.h>
  22#include <linux/splice.h>
  23#include <linux/sched.h>
  24
  25MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  26MODULE_ALIAS("devname:fuse");
  27
  28/* Ordinary requests have even IDs, while interrupts IDs are odd */
  29#define FUSE_INT_REQ_BIT (1ULL << 0)
  30#define FUSE_REQ_ID_STEP (1ULL << 1)
  31
  32static struct kmem_cache *fuse_req_cachep;
  33
  34static struct fuse_dev *fuse_get_dev(struct file *file)
  35{
  36        /*
  37         * Lockless access is OK, because file->private data is set
  38         * once during mount and is valid until the file is released.
  39         */
  40        return READ_ONCE(file->private_data);
  41}
  42
  43static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
  44{
  45        INIT_LIST_HEAD(&req->list);
  46        INIT_LIST_HEAD(&req->intr_entry);
  47        init_waitqueue_head(&req->waitq);
  48        refcount_set(&req->count, 1);
  49        __set_bit(FR_PENDING, &req->flags);
  50        req->fm = fm;
  51}
  52
  53static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
  54{
  55        struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
  56        if (req)
  57                fuse_request_init(fm, req);
  58
  59        return req;
  60}
  61
  62static void fuse_request_free(struct fuse_req *req)
  63{
  64        kmem_cache_free(fuse_req_cachep, req);
  65}
  66
  67static void __fuse_get_request(struct fuse_req *req)
  68{
  69        refcount_inc(&req->count);
  70}
  71
  72/* Must be called with > 1 refcount */
  73static void __fuse_put_request(struct fuse_req *req)
  74{
  75        refcount_dec(&req->count);
  76}
  77
  78void fuse_set_initialized(struct fuse_conn *fc)
  79{
  80        /* Make sure stores before this are seen on another CPU */
  81        smp_wmb();
  82        fc->initialized = 1;
  83}
  84
  85static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
  86{
  87        return !fc->initialized || (for_background && fc->blocked);
  88}
  89
  90static void fuse_drop_waiting(struct fuse_conn *fc)
  91{
  92        /*
  93         * lockess check of fc->connected is okay, because atomic_dec_and_test()
  94         * provides a memory barrier matched with the one in fuse_wait_aborted()
  95         * to ensure no wake-up is missed.
  96         */
  97        if (atomic_dec_and_test(&fc->num_waiting) &&
  98            !READ_ONCE(fc->connected)) {
  99                /* wake up aborters */
 100                wake_up_all(&fc->blocked_waitq);
 101        }
 102}
 103
 104static void fuse_put_request(struct fuse_req *req);
 105
 106static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
 107{
 108        struct fuse_conn *fc = fm->fc;
 109        struct fuse_req *req;
 110        int err;
 111        atomic_inc(&fc->num_waiting);
 112
 113        if (fuse_block_alloc(fc, for_background)) {
 114                err = -EINTR;
 115                if (wait_event_killable_exclusive(fc->blocked_waitq,
 116                                !fuse_block_alloc(fc, for_background)))
 117                        goto out;
 118        }
 119        /* Matches smp_wmb() in fuse_set_initialized() */
 120        smp_rmb();
 121
 122        err = -ENOTCONN;
 123        if (!fc->connected)
 124                goto out;
 125
 126        err = -ECONNREFUSED;
 127        if (fc->conn_error)
 128                goto out;
 129
 130        req = fuse_request_alloc(fm, GFP_KERNEL);
 131        err = -ENOMEM;
 132        if (!req) {
 133                if (for_background)
 134                        wake_up(&fc->blocked_waitq);
 135                goto out;
 136        }
 137
 138        req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
 139        req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
 140        req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 141
 142        __set_bit(FR_WAITING, &req->flags);
 143        if (for_background)
 144                __set_bit(FR_BACKGROUND, &req->flags);
 145
 146        if (unlikely(req->in.h.uid == ((uid_t)-1) ||
 147                     req->in.h.gid == ((gid_t)-1))) {
 148                fuse_put_request(req);
 149                return ERR_PTR(-EOVERFLOW);
 150        }
 151        return req;
 152
 153 out:
 154        fuse_drop_waiting(fc);
 155        return ERR_PTR(err);
 156}
 157
 158static void fuse_put_request(struct fuse_req *req)
 159{
 160        struct fuse_conn *fc = req->fm->fc;
 161
 162        if (refcount_dec_and_test(&req->count)) {
 163                if (test_bit(FR_BACKGROUND, &req->flags)) {
 164                        /*
 165                         * We get here in the unlikely case that a background
 166                         * request was allocated but not sent
 167                         */
 168                        spin_lock(&fc->bg_lock);
 169                        if (!fc->blocked)
 170                                wake_up(&fc->blocked_waitq);
 171                        spin_unlock(&fc->bg_lock);
 172                }
 173
 174                if (test_bit(FR_WAITING, &req->flags)) {
 175                        __clear_bit(FR_WAITING, &req->flags);
 176                        fuse_drop_waiting(fc);
 177                }
 178
 179                fuse_request_free(req);
 180        }
 181}
 182
 183unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 184{
 185        unsigned nbytes = 0;
 186        unsigned i;
 187
 188        for (i = 0; i < numargs; i++)
 189                nbytes += args[i].size;
 190
 191        return nbytes;
 192}
 193EXPORT_SYMBOL_GPL(fuse_len_args);
 194
 195u64 fuse_get_unique(struct fuse_iqueue *fiq)
 196{
 197        fiq->reqctr += FUSE_REQ_ID_STEP;
 198        return fiq->reqctr;
 199}
 200EXPORT_SYMBOL_GPL(fuse_get_unique);
 201
 202static unsigned int fuse_req_hash(u64 unique)
 203{
 204        return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
 205}
 206
 207/**
 208 * A new request is available, wake fiq->waitq
 209 */
 210static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
 211__releases(fiq->lock)
 212{
 213        wake_up(&fiq->waitq);
 214        kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 215        spin_unlock(&fiq->lock);
 216}
 217
 218const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
 219        .wake_forget_and_unlock         = fuse_dev_wake_and_unlock,
 220        .wake_interrupt_and_unlock      = fuse_dev_wake_and_unlock,
 221        .wake_pending_and_unlock        = fuse_dev_wake_and_unlock,
 222};
 223EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
 224
 225static void queue_request_and_unlock(struct fuse_iqueue *fiq,
 226                                     struct fuse_req *req)
 227__releases(fiq->lock)
 228{
 229        req->in.h.len = sizeof(struct fuse_in_header) +
 230                fuse_len_args(req->args->in_numargs,
 231                              (struct fuse_arg *) req->args->in_args);
 232        list_add_tail(&req->list, &fiq->pending);
 233        fiq->ops->wake_pending_and_unlock(fiq);
 234}
 235
 236void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 237                       u64 nodeid, u64 nlookup)
 238{
 239        struct fuse_iqueue *fiq = &fc->iq;
 240
 241        forget->forget_one.nodeid = nodeid;
 242        forget->forget_one.nlookup = nlookup;
 243
 244        spin_lock(&fiq->lock);
 245        if (fiq->connected) {
 246                fiq->forget_list_tail->next = forget;
 247                fiq->forget_list_tail = forget;
 248                fiq->ops->wake_forget_and_unlock(fiq);
 249        } else {
 250                kfree(forget);
 251                spin_unlock(&fiq->lock);
 252        }
 253}
 254
 255static void flush_bg_queue(struct fuse_conn *fc)
 256{
 257        struct fuse_iqueue *fiq = &fc->iq;
 258
 259        while (fc->active_background < fc->max_background &&
 260               !list_empty(&fc->bg_queue)) {
 261                struct fuse_req *req;
 262
 263                req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
 264                list_del(&req->list);
 265                fc->active_background++;
 266                spin_lock(&fiq->lock);
 267                req->in.h.unique = fuse_get_unique(fiq);
 268                queue_request_and_unlock(fiq, req);
 269        }
 270}
 271
 272/*
 273 * This function is called when a request is finished.  Either a reply
 274 * has arrived or it was aborted (and not yet sent) or some error
 275 * occurred during communication with userspace, or the device file
 276 * was closed.  The requester thread is woken up (if still waiting),
 277 * the 'end' callback is called if given, else the reference to the
 278 * request is released
 279 */
 280void fuse_request_end(struct fuse_req *req)
 281{
 282        struct fuse_mount *fm = req->fm;
 283        struct fuse_conn *fc = fm->fc;
 284        struct fuse_iqueue *fiq = &fc->iq;
 285
 286        if (test_and_set_bit(FR_FINISHED, &req->flags))
 287                goto put_request;
 288
 289        /*
 290         * test_and_set_bit() implies smp_mb() between bit
 291         * changing and below FR_INTERRUPTED check. Pairs with
 292         * smp_mb() from queue_interrupt().
 293         */
 294        if (test_bit(FR_INTERRUPTED, &req->flags)) {
 295                spin_lock(&fiq->lock);
 296                list_del_init(&req->intr_entry);
 297                spin_unlock(&fiq->lock);
 298        }
 299        WARN_ON(test_bit(FR_PENDING, &req->flags));
 300        WARN_ON(test_bit(FR_SENT, &req->flags));
 301        if (test_bit(FR_BACKGROUND, &req->flags)) {
 302                spin_lock(&fc->bg_lock);
 303                clear_bit(FR_BACKGROUND, &req->flags);
 304                if (fc->num_background == fc->max_background) {
 305                        fc->blocked = 0;
 306                        wake_up(&fc->blocked_waitq);
 307                } else if (!fc->blocked) {
 308                        /*
 309                         * Wake up next waiter, if any.  It's okay to use
 310                         * waitqueue_active(), as we've already synced up
 311                         * fc->blocked with waiters with the wake_up() call
 312                         * above.
 313                         */
 314                        if (waitqueue_active(&fc->blocked_waitq))
 315                                wake_up(&fc->blocked_waitq);
 316                }
 317
 318                fc->num_background--;
 319                fc->active_background--;
 320                flush_bg_queue(fc);
 321                spin_unlock(&fc->bg_lock);
 322        } else {
 323                /* Wake up waiter sleeping in request_wait_answer() */
 324                wake_up(&req->waitq);
 325        }
 326
 327        if (test_bit(FR_ASYNC, &req->flags))
 328                req->args->end(fm, req->args, req->out.h.error);
 329put_request:
 330        fuse_put_request(req);
 331}
 332EXPORT_SYMBOL_GPL(fuse_request_end);
 333
 334static int queue_interrupt(struct fuse_req *req)
 335{
 336        struct fuse_iqueue *fiq = &req->fm->fc->iq;
 337
 338        spin_lock(&fiq->lock);
 339        /* Check for we've sent request to interrupt this req */
 340        if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
 341                spin_unlock(&fiq->lock);
 342                return -EINVAL;
 343        }
 344
 345        if (list_empty(&req->intr_entry)) {
 346                list_add_tail(&req->intr_entry, &fiq->interrupts);
 347                /*
 348                 * Pairs with smp_mb() implied by test_and_set_bit()
 349                 * from fuse_request_end().
 350                 */
 351                smp_mb();
 352                if (test_bit(FR_FINISHED, &req->flags)) {
 353                        list_del_init(&req->intr_entry);
 354                        spin_unlock(&fiq->lock);
 355                        return 0;
 356                }
 357                fiq->ops->wake_interrupt_and_unlock(fiq);
 358        } else {
 359                spin_unlock(&fiq->lock);
 360        }
 361        return 0;
 362}
 363
 364static void request_wait_answer(struct fuse_req *req)
 365{
 366        struct fuse_conn *fc = req->fm->fc;
 367        struct fuse_iqueue *fiq = &fc->iq;
 368        int err;
 369
 370        if (!fc->no_interrupt) {
 371                /* Any signal may interrupt this */
 372                err = wait_event_interruptible(req->waitq,
 373                                        test_bit(FR_FINISHED, &req->flags));
 374                if (!err)
 375                        return;
 376
 377                set_bit(FR_INTERRUPTED, &req->flags);
 378                /* matches barrier in fuse_dev_do_read() */
 379                smp_mb__after_atomic();
 380                if (test_bit(FR_SENT, &req->flags))
 381                        queue_interrupt(req);
 382        }
 383
 384        if (!test_bit(FR_FORCE, &req->flags)) {
 385                /* Only fatal signals may interrupt this */
 386                err = wait_event_killable(req->waitq,
 387                                        test_bit(FR_FINISHED, &req->flags));
 388                if (!err)
 389                        return;
 390
 391                spin_lock(&fiq->lock);
 392                /* Request is not yet in userspace, bail out */
 393                if (test_bit(FR_PENDING, &req->flags)) {
 394                        list_del(&req->list);
 395                        spin_unlock(&fiq->lock);
 396                        __fuse_put_request(req);
 397                        req->out.h.error = -EINTR;
 398                        return;
 399                }
 400                spin_unlock(&fiq->lock);
 401        }
 402
 403        /*
 404         * Either request is already in userspace, or it was forced.
 405         * Wait it out.
 406         */
 407        wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
 408}
 409
 410static void __fuse_request_send(struct fuse_req *req)
 411{
 412        struct fuse_iqueue *fiq = &req->fm->fc->iq;
 413
 414        BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
 415        spin_lock(&fiq->lock);
 416        if (!fiq->connected) {
 417                spin_unlock(&fiq->lock);
 418                req->out.h.error = -ENOTCONN;
 419        } else {
 420                req->in.h.unique = fuse_get_unique(fiq);
 421                /* acquire extra reference, since request is still needed
 422                   after fuse_request_end() */
 423                __fuse_get_request(req);
 424                queue_request_and_unlock(fiq, req);
 425
 426                request_wait_answer(req);
 427                /* Pairs with smp_wmb() in fuse_request_end() */
 428                smp_rmb();
 429        }
 430}
 431
 432static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
 433{
 434        if (fc->minor < 4 && args->opcode == FUSE_STATFS)
 435                args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
 436
 437        if (fc->minor < 9) {
 438                switch (args->opcode) {
 439                case FUSE_LOOKUP:
 440                case FUSE_CREATE:
 441                case FUSE_MKNOD:
 442                case FUSE_MKDIR:
 443                case FUSE_SYMLINK:
 444                case FUSE_LINK:
 445                        args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 446                        break;
 447                case FUSE_GETATTR:
 448                case FUSE_SETATTR:
 449                        args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 450                        break;
 451                }
 452        }
 453        if (fc->minor < 12) {
 454                switch (args->opcode) {
 455                case FUSE_CREATE:
 456                        args->in_args[0].size = sizeof(struct fuse_open_in);
 457                        break;
 458                case FUSE_MKNOD:
 459                        args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
 460                        break;
 461                }
 462        }
 463}
 464
 465static void fuse_force_creds(struct fuse_req *req)
 466{
 467        struct fuse_conn *fc = req->fm->fc;
 468
 469        req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
 470        req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
 471        req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 472}
 473
 474static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
 475{
 476        req->in.h.opcode = args->opcode;
 477        req->in.h.nodeid = args->nodeid;
 478        req->args = args;
 479        if (args->end)
 480                __set_bit(FR_ASYNC, &req->flags);
 481}
 482
 483ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
 484{
 485        struct fuse_conn *fc = fm->fc;
 486        struct fuse_req *req;
 487        ssize_t ret;
 488
 489        if (args->force) {
 490                atomic_inc(&fc->num_waiting);
 491                req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
 492
 493                if (!args->nocreds)
 494                        fuse_force_creds(req);
 495
 496                __set_bit(FR_WAITING, &req->flags);
 497                __set_bit(FR_FORCE, &req->flags);
 498        } else {
 499                WARN_ON(args->nocreds);
 500                req = fuse_get_req(fm, false);
 501                if (IS_ERR(req))
 502                        return PTR_ERR(req);
 503        }
 504
 505        /* Needs to be done after fuse_get_req() so that fc->minor is valid */
 506        fuse_adjust_compat(fc, args);
 507        fuse_args_to_req(req, args);
 508
 509        if (!args->noreply)
 510                __set_bit(FR_ISREPLY, &req->flags);
 511        __fuse_request_send(req);
 512        ret = req->out.h.error;
 513        if (!ret && args->out_argvar) {
 514                BUG_ON(args->out_numargs == 0);
 515                ret = args->out_args[args->out_numargs - 1].size;
 516        }
 517        fuse_put_request(req);
 518
 519        return ret;
 520}
 521
 522static bool fuse_request_queue_background(struct fuse_req *req)
 523{
 524        struct fuse_mount *fm = req->fm;
 525        struct fuse_conn *fc = fm->fc;
 526        bool queued = false;
 527
 528        WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
 529        if (!test_bit(FR_WAITING, &req->flags)) {
 530                __set_bit(FR_WAITING, &req->flags);
 531                atomic_inc(&fc->num_waiting);
 532        }
 533        __set_bit(FR_ISREPLY, &req->flags);
 534        spin_lock(&fc->bg_lock);
 535        if (likely(fc->connected)) {
 536                fc->num_background++;
 537                if (fc->num_background == fc->max_background)
 538                        fc->blocked = 1;
 539                list_add_tail(&req->list, &fc->bg_queue);
 540                flush_bg_queue(fc);
 541                queued = true;
 542        }
 543        spin_unlock(&fc->bg_lock);
 544
 545        return queued;
 546}
 547
 548int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
 549                            gfp_t gfp_flags)
 550{
 551        struct fuse_req *req;
 552
 553        if (args->force) {
 554                WARN_ON(!args->nocreds);
 555                req = fuse_request_alloc(fm, gfp_flags);
 556                if (!req)
 557                        return -ENOMEM;
 558                __set_bit(FR_BACKGROUND, &req->flags);
 559        } else {
 560                WARN_ON(args->nocreds);
 561                req = fuse_get_req(fm, true);
 562                if (IS_ERR(req))
 563                        return PTR_ERR(req);
 564        }
 565
 566        fuse_args_to_req(req, args);
 567
 568        if (!fuse_request_queue_background(req)) {
 569                fuse_put_request(req);
 570                return -ENOTCONN;
 571        }
 572
 573        return 0;
 574}
 575EXPORT_SYMBOL_GPL(fuse_simple_background);
 576
 577static int fuse_simple_notify_reply(struct fuse_mount *fm,
 578                                    struct fuse_args *args, u64 unique)
 579{
 580        struct fuse_req *req;
 581        struct fuse_iqueue *fiq = &fm->fc->iq;
 582        int err = 0;
 583
 584        req = fuse_get_req(fm, false);
 585        if (IS_ERR(req))
 586                return PTR_ERR(req);
 587
 588        __clear_bit(FR_ISREPLY, &req->flags);
 589        req->in.h.unique = unique;
 590
 591        fuse_args_to_req(req, args);
 592
 593        spin_lock(&fiq->lock);
 594        if (fiq->connected) {
 595                queue_request_and_unlock(fiq, req);
 596        } else {
 597                err = -ENODEV;
 598                spin_unlock(&fiq->lock);
 599                fuse_put_request(req);
 600        }
 601
 602        return err;
 603}
 604
 605/*
 606 * Lock the request.  Up to the next unlock_request() there mustn't be
 607 * anything that could cause a page-fault.  If the request was already
 608 * aborted bail out.
 609 */
 610static int lock_request(struct fuse_req *req)
 611{
 612        int err = 0;
 613        if (req) {
 614                spin_lock(&req->waitq.lock);
 615                if (test_bit(FR_ABORTED, &req->flags))
 616                        err = -ENOENT;
 617                else
 618                        set_bit(FR_LOCKED, &req->flags);
 619                spin_unlock(&req->waitq.lock);
 620        }
 621        return err;
 622}
 623
 624/*
 625 * Unlock request.  If it was aborted while locked, caller is responsible
 626 * for unlocking and ending the request.
 627 */
 628static int unlock_request(struct fuse_req *req)
 629{
 630        int err = 0;
 631        if (req) {
 632                spin_lock(&req->waitq.lock);
 633                if (test_bit(FR_ABORTED, &req->flags))
 634                        err = -ENOENT;
 635                else
 636                        clear_bit(FR_LOCKED, &req->flags);
 637                spin_unlock(&req->waitq.lock);
 638        }
 639        return err;
 640}
 641
 642struct fuse_copy_state {
 643        int write;
 644        struct fuse_req *req;
 645        struct iov_iter *iter;
 646        struct pipe_buffer *pipebufs;
 647        struct pipe_buffer *currbuf;
 648        struct pipe_inode_info *pipe;
 649        unsigned long nr_segs;
 650        struct page *pg;
 651        unsigned len;
 652        unsigned offset;
 653        unsigned move_pages:1;
 654};
 655
 656static void fuse_copy_init(struct fuse_copy_state *cs, int write,
 657                           struct iov_iter *iter)
 658{
 659        memset(cs, 0, sizeof(*cs));
 660        cs->write = write;
 661        cs->iter = iter;
 662}
 663
 664/* Unmap and put previous page of userspace buffer */
 665static void fuse_copy_finish(struct fuse_copy_state *cs)
 666{
 667        if (cs->currbuf) {
 668                struct pipe_buffer *buf = cs->currbuf;
 669
 670                if (cs->write)
 671                        buf->len = PAGE_SIZE - cs->len;
 672                cs->currbuf = NULL;
 673        } else if (cs->pg) {
 674                if (cs->write) {
 675                        flush_dcache_page(cs->pg);
 676                        set_page_dirty_lock(cs->pg);
 677                }
 678                put_page(cs->pg);
 679        }
 680        cs->pg = NULL;
 681}
 682
 683/*
 684 * Get another pagefull of userspace buffer, and map it to kernel
 685 * address space, and lock request
 686 */
 687static int fuse_copy_fill(struct fuse_copy_state *cs)
 688{
 689        struct page *page;
 690        int err;
 691
 692        err = unlock_request(cs->req);
 693        if (err)
 694                return err;
 695
 696        fuse_copy_finish(cs);
 697        if (cs->pipebufs) {
 698                struct pipe_buffer *buf = cs->pipebufs;
 699
 700                if (!cs->write) {
 701                        err = pipe_buf_confirm(cs->pipe, buf);
 702                        if (err)
 703                                return err;
 704
 705                        BUG_ON(!cs->nr_segs);
 706                        cs->currbuf = buf;
 707                        cs->pg = buf->page;
 708                        cs->offset = buf->offset;
 709                        cs->len = buf->len;
 710                        cs->pipebufs++;
 711                        cs->nr_segs--;
 712                } else {
 713                        if (cs->nr_segs >= cs->pipe->max_usage)
 714                                return -EIO;
 715
 716                        page = alloc_page(GFP_HIGHUSER);
 717                        if (!page)
 718                                return -ENOMEM;
 719
 720                        buf->page = page;
 721                        buf->offset = 0;
 722                        buf->len = 0;
 723
 724                        cs->currbuf = buf;
 725                        cs->pg = page;
 726                        cs->offset = 0;
 727                        cs->len = PAGE_SIZE;
 728                        cs->pipebufs++;
 729                        cs->nr_segs++;
 730                }
 731        } else {
 732                size_t off;
 733                err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
 734                if (err < 0)
 735                        return err;
 736                BUG_ON(!err);
 737                cs->len = err;
 738                cs->offset = off;
 739                cs->pg = page;
 740                iov_iter_advance(cs->iter, err);
 741        }
 742
 743        return lock_request(cs->req);
 744}
 745
 746/* Do as much copy to/from userspace buffer as we can */
 747static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 748{
 749        unsigned ncpy = min(*size, cs->len);
 750        if (val) {
 751                void *pgaddr = kmap_local_page(cs->pg);
 752                void *buf = pgaddr + cs->offset;
 753
 754                if (cs->write)
 755                        memcpy(buf, *val, ncpy);
 756                else
 757                        memcpy(*val, buf, ncpy);
 758
 759                kunmap_local(pgaddr);
 760                *val += ncpy;
 761        }
 762        *size -= ncpy;
 763        cs->len -= ncpy;
 764        cs->offset += ncpy;
 765        return ncpy;
 766}
 767
 768static int fuse_check_page(struct page *page)
 769{
 770        if (page_mapcount(page) ||
 771            page->mapping != NULL ||
 772            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 773             ~(1 << PG_locked |
 774               1 << PG_referenced |
 775               1 << PG_uptodate |
 776               1 << PG_lru |
 777               1 << PG_active |
 778               1 << PG_workingset |
 779               1 << PG_reclaim |
 780               1 << PG_waiters))) {
 781                dump_page(page, "fuse: trying to steal weird page");
 782                return 1;
 783        }
 784        return 0;
 785}
 786
 787static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 788{
 789        int err;
 790        struct page *oldpage = *pagep;
 791        struct page *newpage;
 792        struct pipe_buffer *buf = cs->pipebufs;
 793
 794        get_page(oldpage);
 795        err = unlock_request(cs->req);
 796        if (err)
 797                goto out_put_old;
 798
 799        fuse_copy_finish(cs);
 800
 801        err = pipe_buf_confirm(cs->pipe, buf);
 802        if (err)
 803                goto out_put_old;
 804
 805        BUG_ON(!cs->nr_segs);
 806        cs->currbuf = buf;
 807        cs->len = buf->len;
 808        cs->pipebufs++;
 809        cs->nr_segs--;
 810
 811        if (cs->len != PAGE_SIZE)
 812                goto out_fallback;
 813
 814        if (!pipe_buf_try_steal(cs->pipe, buf))
 815                goto out_fallback;
 816
 817        newpage = buf->page;
 818
 819        if (!PageUptodate(newpage))
 820                SetPageUptodate(newpage);
 821
 822        ClearPageMappedToDisk(newpage);
 823
 824        if (fuse_check_page(newpage) != 0)
 825                goto out_fallback_unlock;
 826
 827        /*
 828         * This is a new and locked page, it shouldn't be mapped or
 829         * have any special flags on it
 830         */
 831        if (WARN_ON(page_mapped(oldpage)))
 832                goto out_fallback_unlock;
 833        if (WARN_ON(page_has_private(oldpage)))
 834                goto out_fallback_unlock;
 835        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 836                goto out_fallback_unlock;
 837        if (WARN_ON(PageMlocked(oldpage)))
 838                goto out_fallback_unlock;
 839
 840        replace_page_cache_page(oldpage, newpage);
 841
 842        get_page(newpage);
 843
 844        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 845                lru_cache_add(newpage);
 846
 847        /*
 848         * Release while we have extra ref on stolen page.  Otherwise
 849         * anon_pipe_buf_release() might think the page can be reused.
 850         */
 851        pipe_buf_release(cs->pipe, buf);
 852
 853        err = 0;
 854        spin_lock(&cs->req->waitq.lock);
 855        if (test_bit(FR_ABORTED, &cs->req->flags))
 856                err = -ENOENT;
 857        else
 858                *pagep = newpage;
 859        spin_unlock(&cs->req->waitq.lock);
 860
 861        if (err) {
 862                unlock_page(newpage);
 863                put_page(newpage);
 864                goto out_put_old;
 865        }
 866
 867        unlock_page(oldpage);
 868        /* Drop ref for ap->pages[] array */
 869        put_page(oldpage);
 870        cs->len = 0;
 871
 872        err = 0;
 873out_put_old:
 874        /* Drop ref obtained in this function */
 875        put_page(oldpage);
 876        return err;
 877
 878out_fallback_unlock:
 879        unlock_page(newpage);
 880out_fallback:
 881        cs->pg = buf->page;
 882        cs->offset = buf->offset;
 883
 884        err = lock_request(cs->req);
 885        if (!err)
 886                err = 1;
 887
 888        goto out_put_old;
 889}
 890
 891static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 892                         unsigned offset, unsigned count)
 893{
 894        struct pipe_buffer *buf;
 895        int err;
 896
 897        if (cs->nr_segs >= cs->pipe->max_usage)
 898                return -EIO;
 899
 900        get_page(page);
 901        err = unlock_request(cs->req);
 902        if (err) {
 903                put_page(page);
 904                return err;
 905        }
 906
 907        fuse_copy_finish(cs);
 908
 909        buf = cs->pipebufs;
 910        buf->page = page;
 911        buf->offset = offset;
 912        buf->len = count;
 913
 914        cs->pipebufs++;
 915        cs->nr_segs++;
 916        cs->len = 0;
 917
 918        return 0;
 919}
 920
 921/*
 922 * Copy a page in the request to/from the userspace buffer.  Must be
 923 * done atomically
 924 */
 925static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 926                          unsigned offset, unsigned count, int zeroing)
 927{
 928        int err;
 929        struct page *page = *pagep;
 930
 931        if (page && zeroing && count < PAGE_SIZE)
 932                clear_highpage(page);
 933
 934        while (count) {
 935                if (cs->write && cs->pipebufs && page) {
 936                        /*
 937                         * Can't control lifetime of pipe buffers, so always
 938                         * copy user pages.
 939                         */
 940                        if (cs->req->args->user_pages) {
 941                                err = fuse_copy_fill(cs);
 942                                if (err)
 943                                        return err;
 944                        } else {
 945                                return fuse_ref_page(cs, page, offset, count);
 946                        }
 947                } else if (!cs->len) {
 948                        if (cs->move_pages && page &&
 949                            offset == 0 && count == PAGE_SIZE) {
 950                                err = fuse_try_move_page(cs, pagep);
 951                                if (err <= 0)
 952                                        return err;
 953                        } else {
 954                                err = fuse_copy_fill(cs);
 955                                if (err)
 956                                        return err;
 957                        }
 958                }
 959                if (page) {
 960                        void *mapaddr = kmap_local_page(page);
 961                        void *buf = mapaddr + offset;
 962                        offset += fuse_copy_do(cs, &buf, &count);
 963                        kunmap_local(mapaddr);
 964                } else
 965                        offset += fuse_copy_do(cs, NULL, &count);
 966        }
 967        if (page && !cs->write)
 968                flush_dcache_page(page);
 969        return 0;
 970}
 971
 972/* Copy pages in the request to/from userspace buffer */
 973static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 974                           int zeroing)
 975{
 976        unsigned i;
 977        struct fuse_req *req = cs->req;
 978        struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
 979
 980
 981        for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
 982                int err;
 983                unsigned int offset = ap->descs[i].offset;
 984                unsigned int count = min(nbytes, ap->descs[i].length);
 985
 986                err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
 987                if (err)
 988                        return err;
 989
 990                nbytes -= count;
 991        }
 992        return 0;
 993}
 994
 995/* Copy a single argument in the request to/from userspace buffer */
 996static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 997{
 998        while (size) {
 999                if (!cs->len) {
1000                        int err = fuse_copy_fill(cs);
1001                        if (err)
1002                                return err;
1003                }
1004                fuse_copy_do(cs, &val, &size);
1005        }
1006        return 0;
1007}
1008
1009/* Copy request arguments to/from userspace buffer */
1010static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1011                          unsigned argpages, struct fuse_arg *args,
1012                          int zeroing)
1013{
1014        int err = 0;
1015        unsigned i;
1016
1017        for (i = 0; !err && i < numargs; i++)  {
1018                struct fuse_arg *arg = &args[i];
1019                if (i == numargs - 1 && argpages)
1020                        err = fuse_copy_pages(cs, arg->size, zeroing);
1021                else
1022                        err = fuse_copy_one(cs, arg->value, arg->size);
1023        }
1024        return err;
1025}
1026
1027static int forget_pending(struct fuse_iqueue *fiq)
1028{
1029        return fiq->forget_list_head.next != NULL;
1030}
1031
1032static int request_pending(struct fuse_iqueue *fiq)
1033{
1034        return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1035                forget_pending(fiq);
1036}
1037
1038/*
1039 * Transfer an interrupt request to userspace
1040 *
1041 * Unlike other requests this is assembled on demand, without a need
1042 * to allocate a separate fuse_req structure.
1043 *
1044 * Called with fiq->lock held, releases it
1045 */
1046static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1047                               struct fuse_copy_state *cs,
1048                               size_t nbytes, struct fuse_req *req)
1049__releases(fiq->lock)
1050{
1051        struct fuse_in_header ih;
1052        struct fuse_interrupt_in arg;
1053        unsigned reqsize = sizeof(ih) + sizeof(arg);
1054        int err;
1055
1056        list_del_init(&req->intr_entry);
1057        memset(&ih, 0, sizeof(ih));
1058        memset(&arg, 0, sizeof(arg));
1059        ih.len = reqsize;
1060        ih.opcode = FUSE_INTERRUPT;
1061        ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1062        arg.unique = req->in.h.unique;
1063
1064        spin_unlock(&fiq->lock);
1065        if (nbytes < reqsize)
1066                return -EINVAL;
1067
1068        err = fuse_copy_one(cs, &ih, sizeof(ih));
1069        if (!err)
1070                err = fuse_copy_one(cs, &arg, sizeof(arg));
1071        fuse_copy_finish(cs);
1072
1073        return err ? err : reqsize;
1074}
1075
1076struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1077                                             unsigned int max,
1078                                             unsigned int *countp)
1079{
1080        struct fuse_forget_link *head = fiq->forget_list_head.next;
1081        struct fuse_forget_link **newhead = &head;
1082        unsigned count;
1083
1084        for (count = 0; *newhead != NULL && count < max; count++)
1085                newhead = &(*newhead)->next;
1086
1087        fiq->forget_list_head.next = *newhead;
1088        *newhead = NULL;
1089        if (fiq->forget_list_head.next == NULL)
1090                fiq->forget_list_tail = &fiq->forget_list_head;
1091
1092        if (countp != NULL)
1093                *countp = count;
1094
1095        return head;
1096}
1097EXPORT_SYMBOL(fuse_dequeue_forget);
1098
1099static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1100                                   struct fuse_copy_state *cs,
1101                                   size_t nbytes)
1102__releases(fiq->lock)
1103{
1104        int err;
1105        struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1106        struct fuse_forget_in arg = {
1107                .nlookup = forget->forget_one.nlookup,
1108        };
1109        struct fuse_in_header ih = {
1110                .opcode = FUSE_FORGET,
1111                .nodeid = forget->forget_one.nodeid,
1112                .unique = fuse_get_unique(fiq),
1113                .len = sizeof(ih) + sizeof(arg),
1114        };
1115
1116        spin_unlock(&fiq->lock);
1117        kfree(forget);
1118        if (nbytes < ih.len)
1119                return -EINVAL;
1120
1121        err = fuse_copy_one(cs, &ih, sizeof(ih));
1122        if (!err)
1123                err = fuse_copy_one(cs, &arg, sizeof(arg));
1124        fuse_copy_finish(cs);
1125
1126        if (err)
1127                return err;
1128
1129        return ih.len;
1130}
1131
1132static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1133                                   struct fuse_copy_state *cs, size_t nbytes)
1134__releases(fiq->lock)
1135{
1136        int err;
1137        unsigned max_forgets;
1138        unsigned count;
1139        struct fuse_forget_link *head;
1140        struct fuse_batch_forget_in arg = { .count = 0 };
1141        struct fuse_in_header ih = {
1142                .opcode = FUSE_BATCH_FORGET,
1143                .unique = fuse_get_unique(fiq),
1144                .len = sizeof(ih) + sizeof(arg),
1145        };
1146
1147        if (nbytes < ih.len) {
1148                spin_unlock(&fiq->lock);
1149                return -EINVAL;
1150        }
1151
1152        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1153        head = fuse_dequeue_forget(fiq, max_forgets, &count);
1154        spin_unlock(&fiq->lock);
1155
1156        arg.count = count;
1157        ih.len += count * sizeof(struct fuse_forget_one);
1158        err = fuse_copy_one(cs, &ih, sizeof(ih));
1159        if (!err)
1160                err = fuse_copy_one(cs, &arg, sizeof(arg));
1161
1162        while (head) {
1163                struct fuse_forget_link *forget = head;
1164
1165                if (!err) {
1166                        err = fuse_copy_one(cs, &forget->forget_one,
1167                                            sizeof(forget->forget_one));
1168                }
1169                head = forget->next;
1170                kfree(forget);
1171        }
1172
1173        fuse_copy_finish(cs);
1174
1175        if (err)
1176                return err;
1177
1178        return ih.len;
1179}
1180
1181static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1182                            struct fuse_copy_state *cs,
1183                            size_t nbytes)
1184__releases(fiq->lock)
1185{
1186        if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1187                return fuse_read_single_forget(fiq, cs, nbytes);
1188        else
1189                return fuse_read_batch_forget(fiq, cs, nbytes);
1190}
1191
1192/*
1193 * Read a single request into the userspace filesystem's buffer.  This
1194 * function waits until a request is available, then removes it from
1195 * the pending list and copies request data to userspace buffer.  If
1196 * no reply is needed (FORGET) or request has been aborted or there
1197 * was an error during the copying then it's finished by calling
1198 * fuse_request_end().  Otherwise add it to the processing list, and set
1199 * the 'sent' flag.
1200 */
1201static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1202                                struct fuse_copy_state *cs, size_t nbytes)
1203{
1204        ssize_t err;
1205        struct fuse_conn *fc = fud->fc;
1206        struct fuse_iqueue *fiq = &fc->iq;
1207        struct fuse_pqueue *fpq = &fud->pq;
1208        struct fuse_req *req;
1209        struct fuse_args *args;
1210        unsigned reqsize;
1211        unsigned int hash;
1212
1213        /*
1214         * Require sane minimum read buffer - that has capacity for fixed part
1215         * of any request header + negotiated max_write room for data.
1216         *
1217         * Historically libfuse reserves 4K for fixed header room, but e.g.
1218         * GlusterFS reserves only 80 bytes
1219         *
1220         *      = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1221         *
1222         * which is the absolute minimum any sane filesystem should be using
1223         * for header room.
1224         */
1225        if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1226                           sizeof(struct fuse_in_header) +
1227                           sizeof(struct fuse_write_in) +
1228                           fc->max_write))
1229                return -EINVAL;
1230
1231 restart:
1232        for (;;) {
1233                spin_lock(&fiq->lock);
1234                if (!fiq->connected || request_pending(fiq))
1235                        break;
1236                spin_unlock(&fiq->lock);
1237
1238                if (file->f_flags & O_NONBLOCK)
1239                        return -EAGAIN;
1240                err = wait_event_interruptible_exclusive(fiq->waitq,
1241                                !fiq->connected || request_pending(fiq));
1242                if (err)
1243                        return err;
1244        }
1245
1246        if (!fiq->connected) {
1247                err = fc->aborted ? -ECONNABORTED : -ENODEV;
1248                goto err_unlock;
1249        }
1250
1251        if (!list_empty(&fiq->interrupts)) {
1252                req = list_entry(fiq->interrupts.next, struct fuse_req,
1253                                 intr_entry);
1254                return fuse_read_interrupt(fiq, cs, nbytes, req);
1255        }
1256
1257        if (forget_pending(fiq)) {
1258                if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1259                        return fuse_read_forget(fc, fiq, cs, nbytes);
1260
1261                if (fiq->forget_batch <= -8)
1262                        fiq->forget_batch = 16;
1263        }
1264
1265        req = list_entry(fiq->pending.next, struct fuse_req, list);
1266        clear_bit(FR_PENDING, &req->flags);
1267        list_del_init(&req->list);
1268        spin_unlock(&fiq->lock);
1269
1270        args = req->args;
1271        reqsize = req->in.h.len;
1272
1273        /* If request is too large, reply with an error and restart the read */
1274        if (nbytes < reqsize) {
1275                req->out.h.error = -EIO;
1276                /* SETXATTR is special, since it may contain too large data */
1277                if (args->opcode == FUSE_SETXATTR)
1278                        req->out.h.error = -E2BIG;
1279                fuse_request_end(req);
1280                goto restart;
1281        }
1282        spin_lock(&fpq->lock);
1283        /*
1284         *  Must not put request on fpq->io queue after having been shut down by
1285         *  fuse_abort_conn()
1286         */
1287        if (!fpq->connected) {
1288                req->out.h.error = err = -ECONNABORTED;
1289                goto out_end;
1290
1291        }
1292        list_add(&req->list, &fpq->io);
1293        spin_unlock(&fpq->lock);
1294        cs->req = req;
1295        err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1296        if (!err)
1297                err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1298                                     (struct fuse_arg *) args->in_args, 0);
1299        fuse_copy_finish(cs);
1300        spin_lock(&fpq->lock);
1301        clear_bit(FR_LOCKED, &req->flags);
1302        if (!fpq->connected) {
1303                err = fc->aborted ? -ECONNABORTED : -ENODEV;
1304                goto out_end;
1305        }
1306        if (err) {
1307                req->out.h.error = -EIO;
1308                goto out_end;
1309        }
1310        if (!test_bit(FR_ISREPLY, &req->flags)) {
1311                err = reqsize;
1312                goto out_end;
1313        }
1314        hash = fuse_req_hash(req->in.h.unique);
1315        list_move_tail(&req->list, &fpq->processing[hash]);
1316        __fuse_get_request(req);
1317        set_bit(FR_SENT, &req->flags);
1318        spin_unlock(&fpq->lock);
1319        /* matches barrier in request_wait_answer() */
1320        smp_mb__after_atomic();
1321        if (test_bit(FR_INTERRUPTED, &req->flags))
1322                queue_interrupt(req);
1323        fuse_put_request(req);
1324
1325        return reqsize;
1326
1327out_end:
1328        if (!test_bit(FR_PRIVATE, &req->flags))
1329                list_del_init(&req->list);
1330        spin_unlock(&fpq->lock);
1331        fuse_request_end(req);
1332        return err;
1333
1334 err_unlock:
1335        spin_unlock(&fiq->lock);
1336        return err;
1337}
1338
1339static int fuse_dev_open(struct inode *inode, struct file *file)
1340{
1341        /*
1342         * The fuse device's file's private_data is used to hold
1343         * the fuse_conn(ection) when it is mounted, and is used to
1344         * keep track of whether the file has been mounted already.
1345         */
1346        file->private_data = NULL;
1347        return 0;
1348}
1349
1350static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1351{
1352        struct fuse_copy_state cs;
1353        struct file *file = iocb->ki_filp;
1354        struct fuse_dev *fud = fuse_get_dev(file);
1355
1356        if (!fud)
1357                return -EPERM;
1358
1359        if (!iter_is_iovec(to))
1360                return -EINVAL;
1361
1362        fuse_copy_init(&cs, 1, to);
1363
1364        return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1365}
1366
1367static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1368                                    struct pipe_inode_info *pipe,
1369                                    size_t len, unsigned int flags)
1370{
1371        int total, ret;
1372        int page_nr = 0;
1373        struct pipe_buffer *bufs;
1374        struct fuse_copy_state cs;
1375        struct fuse_dev *fud = fuse_get_dev(in);
1376
1377        if (!fud)
1378                return -EPERM;
1379
1380        bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
1381                              GFP_KERNEL);
1382        if (!bufs)
1383                return -ENOMEM;
1384
1385        fuse_copy_init(&cs, 1, NULL);
1386        cs.pipebufs = bufs;
1387        cs.pipe = pipe;
1388        ret = fuse_dev_do_read(fud, in, &cs, len);
1389        if (ret < 0)
1390                goto out;
1391
1392        if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
1393                ret = -EIO;
1394                goto out;
1395        }
1396
1397        for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1398                /*
1399                 * Need to be careful about this.  Having buf->ops in module
1400                 * code can Oops if the buffer persists after module unload.
1401                 */
1402                bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1403                bufs[page_nr].flags = 0;
1404                ret = add_to_pipe(pipe, &bufs[page_nr++]);
1405                if (unlikely(ret < 0))
1406                        break;
1407        }
1408        if (total)
1409                ret = total;
1410out:
1411        for (; page_nr < cs.nr_segs; page_nr++)
1412                put_page(bufs[page_nr].page);
1413
1414        kvfree(bufs);
1415        return ret;
1416}
1417
1418static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1419                            struct fuse_copy_state *cs)
1420{
1421        struct fuse_notify_poll_wakeup_out outarg;
1422        int err = -EINVAL;
1423
1424        if (size != sizeof(outarg))
1425                goto err;
1426
1427        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1428        if (err)
1429                goto err;
1430
1431        fuse_copy_finish(cs);
1432        return fuse_notify_poll_wakeup(fc, &outarg);
1433
1434err:
1435        fuse_copy_finish(cs);
1436        return err;
1437}
1438
1439static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1440                                   struct fuse_copy_state *cs)
1441{
1442        struct fuse_notify_inval_inode_out outarg;
1443        int err = -EINVAL;
1444
1445        if (size != sizeof(outarg))
1446                goto err;
1447
1448        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1449        if (err)
1450                goto err;
1451        fuse_copy_finish(cs);
1452
1453        down_read(&fc->killsb);
1454        err = fuse_reverse_inval_inode(fc, outarg.ino,
1455                                       outarg.off, outarg.len);
1456        up_read(&fc->killsb);
1457        return err;
1458
1459err:
1460        fuse_copy_finish(cs);
1461        return err;
1462}
1463
1464static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1465                                   struct fuse_copy_state *cs)
1466{
1467        struct fuse_notify_inval_entry_out outarg;
1468        int err = -ENOMEM;
1469        char *buf;
1470        struct qstr name;
1471
1472        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1473        if (!buf)
1474                goto err;
1475
1476        err = -EINVAL;
1477        if (size < sizeof(outarg))
1478                goto err;
1479
1480        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1481        if (err)
1482                goto err;
1483
1484        err = -ENAMETOOLONG;
1485        if (outarg.namelen > FUSE_NAME_MAX)
1486                goto err;
1487
1488        err = -EINVAL;
1489        if (size != sizeof(outarg) + outarg.namelen + 1)
1490                goto err;
1491
1492        name.name = buf;
1493        name.len = outarg.namelen;
1494        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1495        if (err)
1496                goto err;
1497        fuse_copy_finish(cs);
1498        buf[outarg.namelen] = 0;
1499
1500        down_read(&fc->killsb);
1501        err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
1502        up_read(&fc->killsb);
1503        kfree(buf);
1504        return err;
1505
1506err:
1507        kfree(buf);
1508        fuse_copy_finish(cs);
1509        return err;
1510}
1511
1512static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1513                              struct fuse_copy_state *cs)
1514{
1515        struct fuse_notify_delete_out outarg;
1516        int err = -ENOMEM;
1517        char *buf;
1518        struct qstr name;
1519
1520        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1521        if (!buf)
1522                goto err;
1523
1524        err = -EINVAL;
1525        if (size < sizeof(outarg))
1526                goto err;
1527
1528        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1529        if (err)
1530                goto err;
1531
1532        err = -ENAMETOOLONG;
1533        if (outarg.namelen > FUSE_NAME_MAX)
1534                goto err;
1535
1536        err = -EINVAL;
1537        if (size != sizeof(outarg) + outarg.namelen + 1)
1538                goto err;
1539
1540        name.name = buf;
1541        name.len = outarg.namelen;
1542        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1543        if (err)
1544                goto err;
1545        fuse_copy_finish(cs);
1546        buf[outarg.namelen] = 0;
1547
1548        down_read(&fc->killsb);
1549        err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
1550        up_read(&fc->killsb);
1551        kfree(buf);
1552        return err;
1553
1554err:
1555        kfree(buf);
1556        fuse_copy_finish(cs);
1557        return err;
1558}
1559
1560static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1561                             struct fuse_copy_state *cs)
1562{
1563        struct fuse_notify_store_out outarg;
1564        struct inode *inode;
1565        struct address_space *mapping;
1566        u64 nodeid;
1567        int err;
1568        pgoff_t index;
1569        unsigned int offset;
1570        unsigned int num;
1571        loff_t file_size;
1572        loff_t end;
1573
1574        err = -EINVAL;
1575        if (size < sizeof(outarg))
1576                goto out_finish;
1577
1578        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1579        if (err)
1580                goto out_finish;
1581
1582        err = -EINVAL;
1583        if (size - sizeof(outarg) != outarg.size)
1584                goto out_finish;
1585
1586        nodeid = outarg.nodeid;
1587
1588        down_read(&fc->killsb);
1589
1590        err = -ENOENT;
1591        inode = fuse_ilookup(fc, nodeid,  NULL);
1592        if (!inode)
1593                goto out_up_killsb;
1594
1595        mapping = inode->i_mapping;
1596        index = outarg.offset >> PAGE_SHIFT;
1597        offset = outarg.offset & ~PAGE_MASK;
1598        file_size = i_size_read(inode);
1599        end = outarg.offset + outarg.size;
1600        if (end > file_size) {
1601                file_size = end;
1602                fuse_write_update_attr(inode, file_size, outarg.size);
1603        }
1604
1605        num = outarg.size;
1606        while (num) {
1607                struct page *page;
1608                unsigned int this_num;
1609
1610                err = -ENOMEM;
1611                page = find_or_create_page(mapping, index,
1612                                           mapping_gfp_mask(mapping));
1613                if (!page)
1614                        goto out_iput;
1615
1616                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1617                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1618                if (!err && offset == 0 &&
1619                    (this_num == PAGE_SIZE || file_size == end))
1620                        SetPageUptodate(page);
1621                unlock_page(page);
1622                put_page(page);
1623
1624                if (err)
1625                        goto out_iput;
1626
1627                num -= this_num;
1628                offset = 0;
1629                index++;
1630        }
1631
1632        err = 0;
1633
1634out_iput:
1635        iput(inode);
1636out_up_killsb:
1637        up_read(&fc->killsb);
1638out_finish:
1639        fuse_copy_finish(cs);
1640        return err;
1641}
1642
1643struct fuse_retrieve_args {
1644        struct fuse_args_pages ap;
1645        struct fuse_notify_retrieve_in inarg;
1646};
1647
1648static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1649                              int error)
1650{
1651        struct fuse_retrieve_args *ra =
1652                container_of(args, typeof(*ra), ap.args);
1653
1654        release_pages(ra->ap.pages, ra->ap.num_pages);
1655        kfree(ra);
1656}
1657
1658static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
1659                         struct fuse_notify_retrieve_out *outarg)
1660{
1661        int err;
1662        struct address_space *mapping = inode->i_mapping;
1663        pgoff_t index;
1664        loff_t file_size;
1665        unsigned int num;
1666        unsigned int offset;
1667        size_t total_len = 0;
1668        unsigned int num_pages;
1669        struct fuse_conn *fc = fm->fc;
1670        struct fuse_retrieve_args *ra;
1671        size_t args_size = sizeof(*ra);
1672        struct fuse_args_pages *ap;
1673        struct fuse_args *args;
1674
1675        offset = outarg->offset & ~PAGE_MASK;
1676        file_size = i_size_read(inode);
1677
1678        num = min(outarg->size, fc->max_write);
1679        if (outarg->offset > file_size)
1680                num = 0;
1681        else if (outarg->offset + num > file_size)
1682                num = file_size - outarg->offset;
1683
1684        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1685        num_pages = min(num_pages, fc->max_pages);
1686
1687        args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
1688
1689        ra = kzalloc(args_size, GFP_KERNEL);
1690        if (!ra)
1691                return -ENOMEM;
1692
1693        ap = &ra->ap;
1694        ap->pages = (void *) (ra + 1);
1695        ap->descs = (void *) (ap->pages + num_pages);
1696
1697        args = &ap->args;
1698        args->nodeid = outarg->nodeid;
1699        args->opcode = FUSE_NOTIFY_REPLY;
1700        args->in_numargs = 2;
1701        args->in_pages = true;
1702        args->end = fuse_retrieve_end;
1703
1704        index = outarg->offset >> PAGE_SHIFT;
1705
1706        while (num && ap->num_pages < num_pages) {
1707                struct page *page;
1708                unsigned int this_num;
1709
1710                page = find_get_page(mapping, index);
1711                if (!page)
1712                        break;
1713
1714                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1715                ap->pages[ap->num_pages] = page;
1716                ap->descs[ap->num_pages].offset = offset;
1717                ap->descs[ap->num_pages].length = this_num;
1718                ap->num_pages++;
1719
1720                offset = 0;
1721                num -= this_num;
1722                total_len += this_num;
1723                index++;
1724        }
1725        ra->inarg.offset = outarg->offset;
1726        ra->inarg.size = total_len;
1727        args->in_args[0].size = sizeof(ra->inarg);
1728        args->in_args[0].value = &ra->inarg;
1729        args->in_args[1].size = total_len;
1730
1731        err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1732        if (err)
1733                fuse_retrieve_end(fm, args, err);
1734
1735        return err;
1736}
1737
1738static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1739                                struct fuse_copy_state *cs)
1740{
1741        struct fuse_notify_retrieve_out outarg;
1742        struct fuse_mount *fm;
1743        struct inode *inode;
1744        u64 nodeid;
1745        int err;
1746
1747        err = -EINVAL;
1748        if (size != sizeof(outarg))
1749                goto copy_finish;
1750
1751        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1752        if (err)
1753                goto copy_finish;
1754
1755        fuse_copy_finish(cs);
1756
1757        down_read(&fc->killsb);
1758        err = -ENOENT;
1759        nodeid = outarg.nodeid;
1760
1761        inode = fuse_ilookup(fc, nodeid, &fm);
1762        if (inode) {
1763                err = fuse_retrieve(fm, inode, &outarg);
1764                iput(inode);
1765        }
1766        up_read(&fc->killsb);
1767
1768        return err;
1769
1770copy_finish:
1771        fuse_copy_finish(cs);
1772        return err;
1773}
1774
1775static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1776                       unsigned int size, struct fuse_copy_state *cs)
1777{
1778        /* Don't try to move pages (yet) */
1779        cs->move_pages = 0;
1780
1781        switch (code) {
1782        case FUSE_NOTIFY_POLL:
1783                return fuse_notify_poll(fc, size, cs);
1784
1785        case FUSE_NOTIFY_INVAL_INODE:
1786                return fuse_notify_inval_inode(fc, size, cs);
1787
1788        case FUSE_NOTIFY_INVAL_ENTRY:
1789                return fuse_notify_inval_entry(fc, size, cs);
1790
1791        case FUSE_NOTIFY_STORE:
1792                return fuse_notify_store(fc, size, cs);
1793
1794        case FUSE_NOTIFY_RETRIEVE:
1795                return fuse_notify_retrieve(fc, size, cs);
1796
1797        case FUSE_NOTIFY_DELETE:
1798                return fuse_notify_delete(fc, size, cs);
1799
1800        default:
1801                fuse_copy_finish(cs);
1802                return -EINVAL;
1803        }
1804}
1805
1806/* Look up request on processing list by unique ID */
1807static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1808{
1809        unsigned int hash = fuse_req_hash(unique);
1810        struct fuse_req *req;
1811
1812        list_for_each_entry(req, &fpq->processing[hash], list) {
1813                if (req->in.h.unique == unique)
1814                        return req;
1815        }
1816        return NULL;
1817}
1818
1819static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
1820                         unsigned nbytes)
1821{
1822        unsigned reqsize = sizeof(struct fuse_out_header);
1823
1824        reqsize += fuse_len_args(args->out_numargs, args->out_args);
1825
1826        if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
1827                return -EINVAL;
1828        else if (reqsize > nbytes) {
1829                struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
1830                unsigned diffsize = reqsize - nbytes;
1831
1832                if (diffsize > lastarg->size)
1833                        return -EINVAL;
1834                lastarg->size -= diffsize;
1835        }
1836        return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1837                              args->out_args, args->page_zeroing);
1838}
1839
1840/*
1841 * Write a single reply to a request.  First the header is copied from
1842 * the write buffer.  The request is then searched on the processing
1843 * list by the unique ID found in the header.  If found, then remove
1844 * it from the list and copy the rest of the buffer to the request.
1845 * The request is finished by calling fuse_request_end().
1846 */
1847static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1848                                 struct fuse_copy_state *cs, size_t nbytes)
1849{
1850        int err;
1851        struct fuse_conn *fc = fud->fc;
1852        struct fuse_pqueue *fpq = &fud->pq;
1853        struct fuse_req *req;
1854        struct fuse_out_header oh;
1855
1856        err = -EINVAL;
1857        if (nbytes < sizeof(struct fuse_out_header))
1858                goto out;
1859
1860        err = fuse_copy_one(cs, &oh, sizeof(oh));
1861        if (err)
1862                goto copy_finish;
1863
1864        err = -EINVAL;
1865        if (oh.len != nbytes)
1866                goto copy_finish;
1867
1868        /*
1869         * Zero oh.unique indicates unsolicited notification message
1870         * and error contains notification code.
1871         */
1872        if (!oh.unique) {
1873                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1874                goto out;
1875        }
1876
1877        err = -EINVAL;
1878        if (oh.error <= -512 || oh.error > 0)
1879                goto copy_finish;
1880
1881        spin_lock(&fpq->lock);
1882        req = NULL;
1883        if (fpq->connected)
1884                req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1885
1886        err = -ENOENT;
1887        if (!req) {
1888                spin_unlock(&fpq->lock);
1889                goto copy_finish;
1890        }
1891
1892        /* Is it an interrupt reply ID? */
1893        if (oh.unique & FUSE_INT_REQ_BIT) {
1894                __fuse_get_request(req);
1895                spin_unlock(&fpq->lock);
1896
1897                err = 0;
1898                if (nbytes != sizeof(struct fuse_out_header))
1899                        err = -EINVAL;
1900                else if (oh.error == -ENOSYS)
1901                        fc->no_interrupt = 1;
1902                else if (oh.error == -EAGAIN)
1903                        err = queue_interrupt(req);
1904
1905                fuse_put_request(req);
1906
1907                goto copy_finish;
1908        }
1909
1910        clear_bit(FR_SENT, &req->flags);
1911        list_move(&req->list, &fpq->io);
1912        req->out.h = oh;
1913        set_bit(FR_LOCKED, &req->flags);
1914        spin_unlock(&fpq->lock);
1915        cs->req = req;
1916        if (!req->args->page_replace)
1917                cs->move_pages = 0;
1918
1919        if (oh.error)
1920                err = nbytes != sizeof(oh) ? -EINVAL : 0;
1921        else
1922                err = copy_out_args(cs, req->args, nbytes);
1923        fuse_copy_finish(cs);
1924
1925        spin_lock(&fpq->lock);
1926        clear_bit(FR_LOCKED, &req->flags);
1927        if (!fpq->connected)
1928                err = -ENOENT;
1929        else if (err)
1930                req->out.h.error = -EIO;
1931        if (!test_bit(FR_PRIVATE, &req->flags))
1932                list_del_init(&req->list);
1933        spin_unlock(&fpq->lock);
1934
1935        fuse_request_end(req);
1936out:
1937        return err ? err : nbytes;
1938
1939copy_finish:
1940        fuse_copy_finish(cs);
1941        goto out;
1942}
1943
1944static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1945{
1946        struct fuse_copy_state cs;
1947        struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
1948
1949        if (!fud)
1950                return -EPERM;
1951
1952        if (!iter_is_iovec(from))
1953                return -EINVAL;
1954
1955        fuse_copy_init(&cs, 0, from);
1956
1957        return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1958}
1959
1960static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1961                                     struct file *out, loff_t *ppos,
1962                                     size_t len, unsigned int flags)
1963{
1964        unsigned int head, tail, mask, count;
1965        unsigned nbuf;
1966        unsigned idx;
1967        struct pipe_buffer *bufs;
1968        struct fuse_copy_state cs;
1969        struct fuse_dev *fud;
1970        size_t rem;
1971        ssize_t ret;
1972
1973        fud = fuse_get_dev(out);
1974        if (!fud)
1975                return -EPERM;
1976
1977        pipe_lock(pipe);
1978
1979        head = pipe->head;
1980        tail = pipe->tail;
1981        mask = pipe->ring_size - 1;
1982        count = head - tail;
1983
1984        bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
1985        if (!bufs) {
1986                pipe_unlock(pipe);
1987                return -ENOMEM;
1988        }
1989
1990        nbuf = 0;
1991        rem = 0;
1992        for (idx = tail; idx != head && rem < len; idx++)
1993                rem += pipe->bufs[idx & mask].len;
1994
1995        ret = -EINVAL;
1996        if (rem < len)
1997                goto out_free;
1998
1999        rem = len;
2000        while (rem) {
2001                struct pipe_buffer *ibuf;
2002                struct pipe_buffer *obuf;
2003
2004                if (WARN_ON(nbuf >= count || tail == head))
2005                        goto out_free;
2006
2007                ibuf = &pipe->bufs[tail & mask];
2008                obuf = &bufs[nbuf];
2009
2010                if (rem >= ibuf->len) {
2011                        *obuf = *ibuf;
2012                        ibuf->ops = NULL;
2013                        tail++;
2014                        pipe->tail = tail;
2015                } else {
2016                        if (!pipe_buf_get(pipe, ibuf))
2017                                goto out_free;
2018
2019                        *obuf = *ibuf;
2020                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2021                        obuf->len = rem;
2022                        ibuf->offset += obuf->len;
2023                        ibuf->len -= obuf->len;
2024                }
2025                nbuf++;
2026                rem -= obuf->len;
2027        }
2028        pipe_unlock(pipe);
2029
2030        fuse_copy_init(&cs, 0, NULL);
2031        cs.pipebufs = bufs;
2032        cs.nr_segs = nbuf;
2033        cs.pipe = pipe;
2034
2035        if (flags & SPLICE_F_MOVE)
2036                cs.move_pages = 1;
2037
2038        ret = fuse_dev_do_write(fud, &cs, len);
2039
2040        pipe_lock(pipe);
2041out_free:
2042        for (idx = 0; idx < nbuf; idx++) {
2043                struct pipe_buffer *buf = &bufs[idx];
2044
2045                if (buf->ops)
2046                        pipe_buf_release(pipe, buf);
2047        }
2048        pipe_unlock(pipe);
2049
2050        kvfree(bufs);
2051        return ret;
2052}
2053
2054static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2055{
2056        __poll_t mask = EPOLLOUT | EPOLLWRNORM;
2057        struct fuse_iqueue *fiq;
2058        struct fuse_dev *fud = fuse_get_dev(file);
2059
2060        if (!fud)
2061                return EPOLLERR;
2062
2063        fiq = &fud->fc->iq;
2064        poll_wait(file, &fiq->waitq, wait);
2065
2066        spin_lock(&fiq->lock);
2067        if (!fiq->connected)
2068                mask = EPOLLERR;
2069        else if (request_pending(fiq))
2070                mask |= EPOLLIN | EPOLLRDNORM;
2071        spin_unlock(&fiq->lock);
2072
2073        return mask;
2074}
2075
2076/* Abort all requests on the given list (pending or processing) */
2077static void end_requests(struct list_head *head)
2078{
2079        while (!list_empty(head)) {
2080                struct fuse_req *req;
2081                req = list_entry(head->next, struct fuse_req, list);
2082                req->out.h.error = -ECONNABORTED;
2083                clear_bit(FR_SENT, &req->flags);
2084                list_del_init(&req->list);
2085                fuse_request_end(req);
2086        }
2087}
2088
2089static void end_polls(struct fuse_conn *fc)
2090{
2091        struct rb_node *p;
2092
2093        p = rb_first(&fc->polled_files);
2094
2095        while (p) {
2096                struct fuse_file *ff;
2097                ff = rb_entry(p, struct fuse_file, polled_node);
2098                wake_up_interruptible_all(&ff->poll_wait);
2099
2100                p = rb_next(p);
2101        }
2102}
2103
2104/*
2105 * Abort all requests.
2106 *
2107 * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2108 * filesystem.
2109 *
2110 * The same effect is usually achievable through killing the filesystem daemon
2111 * and all users of the filesystem.  The exception is the combination of an
2112 * asynchronous request and the tricky deadlock (see
2113 * Documentation/filesystems/fuse.rst).
2114 *
2115 * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2116 * requests, they should be finished off immediately.  Locked requests will be
2117 * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2118 * requests.  It is possible that some request will finish before we can.  This
2119 * is OK, the request will in that case be removed from the list before we touch
2120 * it.
2121 */
2122void fuse_abort_conn(struct fuse_conn *fc)
2123{
2124        struct fuse_iqueue *fiq = &fc->iq;
2125
2126        spin_lock(&fc->lock);
2127        if (fc->connected) {
2128                struct fuse_dev *fud;
2129                struct fuse_req *req, *next;
2130                LIST_HEAD(to_end);
2131                unsigned int i;
2132
2133                /* Background queuing checks fc->connected under bg_lock */
2134                spin_lock(&fc->bg_lock);
2135                fc->connected = 0;
2136                spin_unlock(&fc->bg_lock);
2137
2138                fuse_set_initialized(fc);
2139                list_for_each_entry(fud, &fc->devices, entry) {
2140                        struct fuse_pqueue *fpq = &fud->pq;
2141
2142                        spin_lock(&fpq->lock);
2143                        fpq->connected = 0;
2144                        list_for_each_entry_safe(req, next, &fpq->io, list) {
2145                                req->out.h.error = -ECONNABORTED;
2146                                spin_lock(&req->waitq.lock);
2147                                set_bit(FR_ABORTED, &req->flags);
2148                                if (!test_bit(FR_LOCKED, &req->flags)) {
2149                                        set_bit(FR_PRIVATE, &req->flags);
2150                                        __fuse_get_request(req);
2151                                        list_move(&req->list, &to_end);
2152                                }
2153                                spin_unlock(&req->waitq.lock);
2154                        }
2155                        for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2156                                list_splice_tail_init(&fpq->processing[i],
2157                                                      &to_end);
2158                        spin_unlock(&fpq->lock);
2159                }
2160                spin_lock(&fc->bg_lock);
2161                fc->blocked = 0;
2162                fc->max_background = UINT_MAX;
2163                flush_bg_queue(fc);
2164                spin_unlock(&fc->bg_lock);
2165
2166                spin_lock(&fiq->lock);
2167                fiq->connected = 0;
2168                list_for_each_entry(req, &fiq->pending, list)
2169                        clear_bit(FR_PENDING, &req->flags);
2170                list_splice_tail_init(&fiq->pending, &to_end);
2171                while (forget_pending(fiq))
2172                        kfree(fuse_dequeue_forget(fiq, 1, NULL));
2173                wake_up_all(&fiq->waitq);
2174                spin_unlock(&fiq->lock);
2175                kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2176                end_polls(fc);
2177                wake_up_all(&fc->blocked_waitq);
2178                spin_unlock(&fc->lock);
2179
2180                end_requests(&to_end);
2181        } else {
2182                spin_unlock(&fc->lock);
2183        }
2184}
2185EXPORT_SYMBOL_GPL(fuse_abort_conn);
2186
2187void fuse_wait_aborted(struct fuse_conn *fc)
2188{
2189        /* matches implicit memory barrier in fuse_drop_waiting() */
2190        smp_mb();
2191        wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2192}
2193
2194int fuse_dev_release(struct inode *inode, struct file *file)
2195{
2196        struct fuse_dev *fud = fuse_get_dev(file);
2197
2198        if (fud) {
2199                struct fuse_conn *fc = fud->fc;
2200                struct fuse_pqueue *fpq = &fud->pq;
2201                LIST_HEAD(to_end);
2202                unsigned int i;
2203
2204                spin_lock(&fpq->lock);
2205                WARN_ON(!list_empty(&fpq->io));
2206                for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2207                        list_splice_init(&fpq->processing[i], &to_end);
2208                spin_unlock(&fpq->lock);
2209
2210                end_requests(&to_end);
2211
2212                /* Are we the last open device? */
2213                if (atomic_dec_and_test(&fc->dev_count)) {
2214                        WARN_ON(fc->iq.fasync != NULL);
2215                        fuse_abort_conn(fc);
2216                }
2217                fuse_dev_free(fud);
2218        }
2219        return 0;
2220}
2221EXPORT_SYMBOL_GPL(fuse_dev_release);
2222
2223static int fuse_dev_fasync(int fd, struct file *file, int on)
2224{
2225        struct fuse_dev *fud = fuse_get_dev(file);
2226
2227        if (!fud)
2228                return -EPERM;
2229
2230        /* No locking - fasync_helper does its own locking */
2231        return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2232}
2233
2234static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2235{
2236        struct fuse_dev *fud;
2237
2238        if (new->private_data)
2239                return -EINVAL;
2240
2241        fud = fuse_dev_alloc_install(fc);
2242        if (!fud)
2243                return -ENOMEM;
2244
2245        new->private_data = fud;
2246        atomic_inc(&fc->dev_count);
2247
2248        return 0;
2249}
2250
2251static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2252                           unsigned long arg)
2253{
2254        int res;
2255        int oldfd;
2256        struct fuse_dev *fud = NULL;
2257
2258        switch (cmd) {
2259        case FUSE_DEV_IOC_CLONE:
2260                res = -EFAULT;
2261                if (!get_user(oldfd, (__u32 __user *)arg)) {
2262                        struct file *old = fget(oldfd);
2263
2264                        res = -EINVAL;
2265                        if (old) {
2266                                /*
2267                                 * Check against file->f_op because CUSE
2268                                 * uses the same ioctl handler.
2269                                 */
2270                                if (old->f_op == file->f_op &&
2271                                    old->f_cred->user_ns == file->f_cred->user_ns)
2272                                        fud = fuse_get_dev(old);
2273
2274                                if (fud) {
2275                                        mutex_lock(&fuse_mutex);
2276                                        res = fuse_device_clone(fud->fc, file);
2277                                        mutex_unlock(&fuse_mutex);
2278                                }
2279                                fput(old);
2280                        }
2281                }
2282                break;
2283        default:
2284                res = -ENOTTY;
2285                break;
2286        }
2287        return res;
2288}
2289
2290const struct file_operations fuse_dev_operations = {
2291        .owner          = THIS_MODULE,
2292        .open           = fuse_dev_open,
2293        .llseek         = no_llseek,
2294        .read_iter      = fuse_dev_read,
2295        .splice_read    = fuse_dev_splice_read,
2296        .write_iter     = fuse_dev_write,
2297        .splice_write   = fuse_dev_splice_write,
2298        .poll           = fuse_dev_poll,
2299        .release        = fuse_dev_release,
2300        .fasync         = fuse_dev_fasync,
2301        .unlocked_ioctl = fuse_dev_ioctl,
2302        .compat_ioctl   = compat_ptr_ioctl,
2303};
2304EXPORT_SYMBOL_GPL(fuse_dev_operations);
2305
2306static struct miscdevice fuse_miscdevice = {
2307        .minor = FUSE_MINOR,
2308        .name  = "fuse",
2309        .fops = &fuse_dev_operations,
2310};
2311
2312int __init fuse_dev_init(void)
2313{
2314        int err = -ENOMEM;
2315        fuse_req_cachep = kmem_cache_create("fuse_request",
2316                                            sizeof(struct fuse_req),
2317                                            0, 0, NULL);
2318        if (!fuse_req_cachep)
2319                goto out;
2320
2321        err = misc_register(&fuse_miscdevice);
2322        if (err)
2323                goto out_cache_clean;
2324
2325        return 0;
2326
2327 out_cache_clean:
2328        kmem_cache_destroy(fuse_req_cachep);
2329 out:
2330        return err;
2331}
2332
2333void fuse_dev_cleanup(void)
2334{
2335        misc_deregister(&fuse_miscdevice);
2336        kmem_cache_destroy(fuse_req_cachep);
2337}
2338