linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22#include <linux/aio.h>
  23
  24MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  25MODULE_ALIAS("devname:fuse");
  26
  27static struct kmem_cache *fuse_req_cachep;
  28
  29static struct fuse_conn *fuse_get_conn(struct file *file)
  30{
  31        /*
  32         * Lockless access is OK, because file->private data is set
  33         * once during mount and is valid until the file is released.
  34         */
  35        return file->private_data;
  36}
  37
  38static void fuse_request_init(struct fuse_req *req, struct page **pages,
  39                              struct fuse_page_desc *page_descs,
  40                              unsigned npages)
  41{
  42        memset(req, 0, sizeof(*req));
  43        memset(pages, 0, sizeof(*pages) * npages);
  44        memset(page_descs, 0, sizeof(*page_descs) * npages);
  45        INIT_LIST_HEAD(&req->list);
  46        INIT_LIST_HEAD(&req->intr_entry);
  47        init_waitqueue_head(&req->waitq);
  48        atomic_set(&req->count, 1);
  49        req->pages = pages;
  50        req->page_descs = page_descs;
  51        req->max_pages = npages;
  52}
  53
  54static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
  55{
  56        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
  57        if (req) {
  58                struct page **pages;
  59                struct fuse_page_desc *page_descs;
  60
  61                if (npages <= FUSE_REQ_INLINE_PAGES) {
  62                        pages = req->inline_pages;
  63                        page_descs = req->inline_page_descs;
  64                } else {
  65                        pages = kmalloc(sizeof(struct page *) * npages, flags);
  66                        page_descs = kmalloc(sizeof(struct fuse_page_desc) *
  67                                             npages, flags);
  68                }
  69
  70                if (!pages || !page_descs) {
  71                        kfree(pages);
  72                        kfree(page_descs);
  73                        kmem_cache_free(fuse_req_cachep, req);
  74                        return NULL;
  75                }
  76
  77                fuse_request_init(req, pages, page_descs, npages);
  78        }
  79        return req;
  80}
  81
  82struct fuse_req *fuse_request_alloc(unsigned npages)
  83{
  84        return __fuse_request_alloc(npages, GFP_KERNEL);
  85}
  86EXPORT_SYMBOL_GPL(fuse_request_alloc);
  87
  88struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
  89{
  90        return __fuse_request_alloc(npages, GFP_NOFS);
  91}
  92
  93void fuse_request_free(struct fuse_req *req)
  94{
  95        if (req->pages != req->inline_pages) {
  96                kfree(req->pages);
  97                kfree(req->page_descs);
  98        }
  99        kmem_cache_free(fuse_req_cachep, req);
 100}
 101
 102static void block_sigs(sigset_t *oldset)
 103{
 104        sigset_t mask;
 105
 106        siginitsetinv(&mask, sigmask(SIGKILL));
 107        sigprocmask(SIG_BLOCK, &mask, oldset);
 108}
 109
 110static void restore_sigs(sigset_t *oldset)
 111{
 112        sigprocmask(SIG_SETMASK, oldset, NULL);
 113}
 114
 115void __fuse_get_request(struct fuse_req *req)
 116{
 117        atomic_inc(&req->count);
 118}
 119
 120/* Must be called with > 1 refcount */
 121static void __fuse_put_request(struct fuse_req *req)
 122{
 123        BUG_ON(atomic_read(&req->count) < 2);
 124        atomic_dec(&req->count);
 125}
 126
 127static void fuse_req_init_context(struct fuse_req *req)
 128{
 129        req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
 130        req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
 131        req->in.h.pid = current->pid;
 132}
 133
 134static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 135{
 136        return !fc->initialized || (for_background && fc->blocked);
 137}
 138
 139static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 140                                       bool for_background)
 141{
 142        struct fuse_req *req;
 143        int err;
 144        atomic_inc(&fc->num_waiting);
 145
 146        if (fuse_block_alloc(fc, for_background)) {
 147                sigset_t oldset;
 148                int intr;
 149
 150                block_sigs(&oldset);
 151                intr = wait_event_interruptible_exclusive(fc->blocked_waitq,
 152                                !fuse_block_alloc(fc, for_background));
 153                restore_sigs(&oldset);
 154                err = -EINTR;
 155                if (intr)
 156                        goto out;
 157        }
 158
 159        err = -ENOTCONN;
 160        if (!fc->connected)
 161                goto out;
 162
 163        req = fuse_request_alloc(npages);
 164        err = -ENOMEM;
 165        if (!req) {
 166                if (for_background)
 167                        wake_up(&fc->blocked_waitq);
 168                goto out;
 169        }
 170
 171        fuse_req_init_context(req);
 172        req->waiting = 1;
 173        req->background = for_background;
 174        return req;
 175
 176 out:
 177        atomic_dec(&fc->num_waiting);
 178        return ERR_PTR(err);
 179}
 180
 181struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
 182{
 183        return __fuse_get_req(fc, npages, false);
 184}
 185EXPORT_SYMBOL_GPL(fuse_get_req);
 186
 187struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
 188                                             unsigned npages)
 189{
 190        return __fuse_get_req(fc, npages, true);
 191}
 192EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
 193
 194/*
 195 * Return request in fuse_file->reserved_req.  However that may
 196 * currently be in use.  If that is the case, wait for it to become
 197 * available.
 198 */
 199static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 200                                         struct file *file)
 201{
 202        struct fuse_req *req = NULL;
 203        struct fuse_file *ff = file->private_data;
 204
 205        do {
 206                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 207                spin_lock(&fc->lock);
 208                if (ff->reserved_req) {
 209                        req = ff->reserved_req;
 210                        ff->reserved_req = NULL;
 211                        req->stolen_file = get_file(file);
 212                }
 213                spin_unlock(&fc->lock);
 214        } while (!req);
 215
 216        return req;
 217}
 218
 219/*
 220 * Put stolen request back into fuse_file->reserved_req
 221 */
 222static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 223{
 224        struct file *file = req->stolen_file;
 225        struct fuse_file *ff = file->private_data;
 226
 227        spin_lock(&fc->lock);
 228        fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
 229        BUG_ON(ff->reserved_req);
 230        ff->reserved_req = req;
 231        wake_up_all(&fc->reserved_req_waitq);
 232        spin_unlock(&fc->lock);
 233        fput(file);
 234}
 235
 236/*
 237 * Gets a requests for a file operation, always succeeds
 238 *
 239 * This is used for sending the FLUSH request, which must get to
 240 * userspace, due to POSIX locks which may need to be unlocked.
 241 *
 242 * If allocation fails due to OOM, use the reserved request in
 243 * fuse_file.
 244 *
 245 * This is very unlikely to deadlock accidentally, since the
 246 * filesystem should not have it's own file open.  If deadlock is
 247 * intentional, it can still be broken by "aborting" the filesystem.
 248 */
 249struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 250                                             struct file *file)
 251{
 252        struct fuse_req *req;
 253
 254        atomic_inc(&fc->num_waiting);
 255        wait_event(fc->blocked_waitq, fc->initialized);
 256        req = fuse_request_alloc(0);
 257        if (!req)
 258                req = get_reserved_req(fc, file);
 259
 260        fuse_req_init_context(req);
 261        req->waiting = 1;
 262        req->background = 0;
 263        return req;
 264}
 265
 266void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 267{
 268        if (atomic_dec_and_test(&req->count)) {
 269                if (unlikely(req->background)) {
 270                        /*
 271                         * We get here in the unlikely case that a background
 272                         * request was allocated but not sent
 273                         */
 274                        spin_lock(&fc->lock);
 275                        if (!fc->blocked)
 276                                wake_up(&fc->blocked_waitq);
 277                        spin_unlock(&fc->lock);
 278                }
 279
 280                if (req->waiting)
 281                        atomic_dec(&fc->num_waiting);
 282
 283                if (req->stolen_file)
 284                        put_reserved_req(fc, req);
 285                else
 286                        fuse_request_free(req);
 287        }
 288}
 289EXPORT_SYMBOL_GPL(fuse_put_request);
 290
 291static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 292{
 293        unsigned nbytes = 0;
 294        unsigned i;
 295
 296        for (i = 0; i < numargs; i++)
 297                nbytes += args[i].size;
 298
 299        return nbytes;
 300}
 301
 302static u64 fuse_get_unique(struct fuse_conn *fc)
 303{
 304        fc->reqctr++;
 305        /* zero is special */
 306        if (fc->reqctr == 0)
 307                fc->reqctr = 1;
 308
 309        return fc->reqctr;
 310}
 311
 312static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 313{
 314        req->in.h.len = sizeof(struct fuse_in_header) +
 315                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 316        list_add_tail(&req->list, &fc->pending);
 317        req->state = FUSE_REQ_PENDING;
 318        if (!req->waiting) {
 319                req->waiting = 1;
 320                atomic_inc(&fc->num_waiting);
 321        }
 322        wake_up(&fc->waitq);
 323        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 324}
 325
 326void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 327                       u64 nodeid, u64 nlookup)
 328{
 329        forget->forget_one.nodeid = nodeid;
 330        forget->forget_one.nlookup = nlookup;
 331
 332        spin_lock(&fc->lock);
 333        if (fc->connected) {
 334                fc->forget_list_tail->next = forget;
 335                fc->forget_list_tail = forget;
 336                wake_up(&fc->waitq);
 337                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 338        } else {
 339                kfree(forget);
 340        }
 341        spin_unlock(&fc->lock);
 342}
 343
 344static void flush_bg_queue(struct fuse_conn *fc)
 345{
 346        while (fc->active_background < fc->max_background &&
 347               !list_empty(&fc->bg_queue)) {
 348                struct fuse_req *req;
 349
 350                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 351                list_del(&req->list);
 352                fc->active_background++;
 353                req->in.h.unique = fuse_get_unique(fc);
 354                queue_request(fc, req);
 355        }
 356}
 357
 358/*
 359 * This function is called when a request is finished.  Either a reply
 360 * has arrived or it was aborted (and not yet sent) or some error
 361 * occurred during communication with userspace, or the device file
 362 * was closed.  The requester thread is woken up (if still waiting),
 363 * the 'end' callback is called if given, else the reference to the
 364 * request is released
 365 *
 366 * Called with fc->lock, unlocks it
 367 */
 368static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 369__releases(fc->lock)
 370{
 371        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 372        req->end = NULL;
 373        list_del(&req->list);
 374        list_del(&req->intr_entry);
 375        req->state = FUSE_REQ_FINISHED;
 376        if (req->background) {
 377                req->background = 0;
 378
 379                if (fc->num_background == fc->max_background)
 380                        fc->blocked = 0;
 381
 382                /* Wake up next waiter, if any */
 383                if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
 384                        wake_up(&fc->blocked_waitq);
 385
 386                if (fc->num_background == fc->congestion_threshold &&
 387                    fc->connected && fc->bdi_initialized) {
 388                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 389                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 390                }
 391                fc->num_background--;
 392                fc->active_background--;
 393                flush_bg_queue(fc);
 394        }
 395        spin_unlock(&fc->lock);
 396        wake_up(&req->waitq);
 397        if (end)
 398                end(fc, req);
 399        fuse_put_request(fc, req);
 400}
 401
 402static void wait_answer_interruptible(struct fuse_conn *fc,
 403                                      struct fuse_req *req)
 404__releases(fc->lock)
 405__acquires(fc->lock)
 406{
 407        if (signal_pending(current))
 408                return;
 409
 410        spin_unlock(&fc->lock);
 411        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 412        spin_lock(&fc->lock);
 413}
 414
 415static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 416{
 417        list_add_tail(&req->intr_entry, &fc->interrupts);
 418        wake_up(&fc->waitq);
 419        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 420}
 421
 422static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 423__releases(fc->lock)
 424__acquires(fc->lock)
 425{
 426        if (!fc->no_interrupt) {
 427                /* Any signal may interrupt this */
 428                wait_answer_interruptible(fc, req);
 429
 430                if (req->aborted)
 431                        goto aborted;
 432                if (req->state == FUSE_REQ_FINISHED)
 433                        return;
 434
 435                req->interrupted = 1;
 436                if (req->state == FUSE_REQ_SENT)
 437                        queue_interrupt(fc, req);
 438        }
 439
 440        if (!req->force) {
 441                sigset_t oldset;
 442
 443                /* Only fatal signals may interrupt this */
 444                block_sigs(&oldset);
 445                wait_answer_interruptible(fc, req);
 446                restore_sigs(&oldset);
 447
 448                if (req->aborted)
 449                        goto aborted;
 450                if (req->state == FUSE_REQ_FINISHED)
 451                        return;
 452
 453                /* Request is not yet in userspace, bail out */
 454                if (req->state == FUSE_REQ_PENDING) {
 455                        list_del(&req->list);
 456                        __fuse_put_request(req);
 457                        req->out.h.error = -EINTR;
 458                        return;
 459                }
 460        }
 461
 462        /*
 463         * Either request is already in userspace, or it was forced.
 464         * Wait it out.
 465         */
 466        spin_unlock(&fc->lock);
 467        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 468        spin_lock(&fc->lock);
 469
 470        if (!req->aborted)
 471                return;
 472
 473 aborted:
 474        BUG_ON(req->state != FUSE_REQ_FINISHED);
 475        if (req->locked) {
 476                /* This is uninterruptible sleep, because data is
 477                   being copied to/from the buffers of req.  During
 478                   locked state, there mustn't be any filesystem
 479                   operation (e.g. page fault), since that could lead
 480                   to deadlock */
 481                spin_unlock(&fc->lock);
 482                wait_event(req->waitq, !req->locked);
 483                spin_lock(&fc->lock);
 484        }
 485}
 486
 487static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 488{
 489        BUG_ON(req->background);
 490        spin_lock(&fc->lock);
 491        if (!fc->connected)
 492                req->out.h.error = -ENOTCONN;
 493        else if (fc->conn_error)
 494                req->out.h.error = -ECONNREFUSED;
 495        else {
 496                req->in.h.unique = fuse_get_unique(fc);
 497                queue_request(fc, req);
 498                /* acquire extra reference, since request is still needed
 499                   after request_end() */
 500                __fuse_get_request(req);
 501
 502                request_wait_answer(fc, req);
 503        }
 504        spin_unlock(&fc->lock);
 505}
 506
 507void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 508{
 509        req->isreply = 1;
 510        __fuse_request_send(fc, req);
 511}
 512EXPORT_SYMBOL_GPL(fuse_request_send);
 513
 514static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 515                                            struct fuse_req *req)
 516{
 517        BUG_ON(!req->background);
 518        fc->num_background++;
 519        if (fc->num_background == fc->max_background)
 520                fc->blocked = 1;
 521        if (fc->num_background == fc->congestion_threshold &&
 522            fc->bdi_initialized) {
 523                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 524                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 525        }
 526        list_add_tail(&req->list, &fc->bg_queue);
 527        flush_bg_queue(fc);
 528}
 529
 530static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 531{
 532        spin_lock(&fc->lock);
 533        if (fc->connected) {
 534                fuse_request_send_nowait_locked(fc, req);
 535                spin_unlock(&fc->lock);
 536        } else {
 537                req->out.h.error = -ENOTCONN;
 538                request_end(fc, req);
 539        }
 540}
 541
 542void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 543{
 544        req->isreply = 1;
 545        fuse_request_send_nowait(fc, req);
 546}
 547EXPORT_SYMBOL_GPL(fuse_request_send_background);
 548
 549static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 550                                          struct fuse_req *req, u64 unique)
 551{
 552        int err = -ENODEV;
 553
 554        req->isreply = 0;
 555        req->in.h.unique = unique;
 556        spin_lock(&fc->lock);
 557        if (fc->connected) {
 558                queue_request(fc, req);
 559                err = 0;
 560        }
 561        spin_unlock(&fc->lock);
 562
 563        return err;
 564}
 565
 566/*
 567 * Called under fc->lock
 568 *
 569 * fc->connected must have been checked previously
 570 */
 571void fuse_request_send_background_locked(struct fuse_conn *fc,
 572                                         struct fuse_req *req)
 573{
 574        req->isreply = 1;
 575        fuse_request_send_nowait_locked(fc, req);
 576}
 577
 578void fuse_force_forget(struct file *file, u64 nodeid)
 579{
 580        struct inode *inode = file_inode(file);
 581        struct fuse_conn *fc = get_fuse_conn(inode);
 582        struct fuse_req *req;
 583        struct fuse_forget_in inarg;
 584
 585        memset(&inarg, 0, sizeof(inarg));
 586        inarg.nlookup = 1;
 587        req = fuse_get_req_nofail_nopages(fc, file);
 588        req->in.h.opcode = FUSE_FORGET;
 589        req->in.h.nodeid = nodeid;
 590        req->in.numargs = 1;
 591        req->in.args[0].size = sizeof(inarg);
 592        req->in.args[0].value = &inarg;
 593        req->isreply = 0;
 594        __fuse_request_send(fc, req);
 595        /* ignore errors */
 596        fuse_put_request(fc, req);
 597}
 598
 599/*
 600 * Lock the request.  Up to the next unlock_request() there mustn't be
 601 * anything that could cause a page-fault.  If the request was already
 602 * aborted bail out.
 603 */
 604static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 605{
 606        int err = 0;
 607        if (req) {
 608                spin_lock(&fc->lock);
 609                if (req->aborted)
 610                        err = -ENOENT;
 611                else
 612                        req->locked = 1;
 613                spin_unlock(&fc->lock);
 614        }
 615        return err;
 616}
 617
 618/*
 619 * Unlock request.  If it was aborted during being locked, the
 620 * requester thread is currently waiting for it to be unlocked, so
 621 * wake it up.
 622 */
 623static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 624{
 625        if (req) {
 626                spin_lock(&fc->lock);
 627                req->locked = 0;
 628                if (req->aborted)
 629                        wake_up(&req->waitq);
 630                spin_unlock(&fc->lock);
 631        }
 632}
 633
 634struct fuse_copy_state {
 635        struct fuse_conn *fc;
 636        int write;
 637        struct fuse_req *req;
 638        const struct iovec *iov;
 639        struct pipe_buffer *pipebufs;
 640        struct pipe_buffer *currbuf;
 641        struct pipe_inode_info *pipe;
 642        unsigned long nr_segs;
 643        unsigned long seglen;
 644        unsigned long addr;
 645        struct page *pg;
 646        unsigned len;
 647        unsigned offset;
 648        unsigned move_pages:1;
 649};
 650
 651static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 652                           int write,
 653                           const struct iovec *iov, unsigned long nr_segs)
 654{
 655        memset(cs, 0, sizeof(*cs));
 656        cs->fc = fc;
 657        cs->write = write;
 658        cs->iov = iov;
 659        cs->nr_segs = nr_segs;
 660}
 661
 662/* Unmap and put previous page of userspace buffer */
 663static void fuse_copy_finish(struct fuse_copy_state *cs)
 664{
 665        if (cs->currbuf) {
 666                struct pipe_buffer *buf = cs->currbuf;
 667
 668                if (cs->write)
 669                        buf->len = PAGE_SIZE - cs->len;
 670                cs->currbuf = NULL;
 671        } else if (cs->pg) {
 672                if (cs->write) {
 673                        flush_dcache_page(cs->pg);
 674                        set_page_dirty_lock(cs->pg);
 675                }
 676                put_page(cs->pg);
 677        }
 678        cs->pg = NULL;
 679}
 680
 681/*
 682 * Get another pagefull of userspace buffer, and map it to kernel
 683 * address space, and lock request
 684 */
 685static int fuse_copy_fill(struct fuse_copy_state *cs)
 686{
 687        struct page *page;
 688        int err;
 689
 690        unlock_request(cs->fc, cs->req);
 691        fuse_copy_finish(cs);
 692        if (cs->pipebufs) {
 693                struct pipe_buffer *buf = cs->pipebufs;
 694
 695                if (!cs->write) {
 696                        err = buf->ops->confirm(cs->pipe, buf);
 697                        if (err)
 698                                return err;
 699
 700                        BUG_ON(!cs->nr_segs);
 701                        cs->currbuf = buf;
 702                        cs->pg = buf->page;
 703                        cs->offset = buf->offset;
 704                        cs->len = buf->len;
 705                        cs->pipebufs++;
 706                        cs->nr_segs--;
 707                } else {
 708                        if (cs->nr_segs == cs->pipe->buffers)
 709                                return -EIO;
 710
 711                        page = alloc_page(GFP_HIGHUSER);
 712                        if (!page)
 713                                return -ENOMEM;
 714
 715                        buf->page = page;
 716                        buf->offset = 0;
 717                        buf->len = 0;
 718
 719                        cs->currbuf = buf;
 720                        cs->pg = page;
 721                        cs->offset = 0;
 722                        cs->len = PAGE_SIZE;
 723                        cs->pipebufs++;
 724                        cs->nr_segs++;
 725                }
 726        } else {
 727                if (!cs->seglen) {
 728                        BUG_ON(!cs->nr_segs);
 729                        cs->seglen = cs->iov[0].iov_len;
 730                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 731                        cs->iov++;
 732                        cs->nr_segs--;
 733                }
 734                err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
 735                if (err < 0)
 736                        return err;
 737                BUG_ON(err != 1);
 738                cs->pg = page;
 739                cs->offset = cs->addr % PAGE_SIZE;
 740                cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
 741                cs->seglen -= cs->len;
 742                cs->addr += cs->len;
 743        }
 744
 745        return lock_request(cs->fc, cs->req);
 746}
 747
 748/* Do as much copy to/from userspace buffer as we can */
 749static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 750{
 751        unsigned ncpy = min(*size, cs->len);
 752        if (val) {
 753                void *pgaddr = kmap_atomic(cs->pg);
 754                void *buf = pgaddr + cs->offset;
 755
 756                if (cs->write)
 757                        memcpy(buf, *val, ncpy);
 758                else
 759                        memcpy(*val, buf, ncpy);
 760
 761                kunmap_atomic(pgaddr);
 762                *val += ncpy;
 763        }
 764        *size -= ncpy;
 765        cs->len -= ncpy;
 766        cs->offset += ncpy;
 767        return ncpy;
 768}
 769
 770static int fuse_check_page(struct page *page)
 771{
 772        if (page_mapcount(page) ||
 773            page->mapping != NULL ||
 774            page_count(page) != 1 ||
 775            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 776             ~(1 << PG_locked |
 777               1 << PG_referenced |
 778               1 << PG_uptodate |
 779               1 << PG_lru |
 780               1 << PG_active |
 781               1 << PG_reclaim))) {
 782                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 783                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 784                return 1;
 785        }
 786        return 0;
 787}
 788
 789static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 790{
 791        int err;
 792        struct page *oldpage = *pagep;
 793        struct page *newpage;
 794        struct pipe_buffer *buf = cs->pipebufs;
 795
 796        unlock_request(cs->fc, cs->req);
 797        fuse_copy_finish(cs);
 798
 799        err = buf->ops->confirm(cs->pipe, buf);
 800        if (err)
 801                return err;
 802
 803        BUG_ON(!cs->nr_segs);
 804        cs->currbuf = buf;
 805        cs->len = buf->len;
 806        cs->pipebufs++;
 807        cs->nr_segs--;
 808
 809        if (cs->len != PAGE_SIZE)
 810                goto out_fallback;
 811
 812        if (buf->ops->steal(cs->pipe, buf) != 0)
 813                goto out_fallback;
 814
 815        newpage = buf->page;
 816
 817        if (WARN_ON(!PageUptodate(newpage)))
 818                return -EIO;
 819
 820        ClearPageMappedToDisk(newpage);
 821
 822        if (fuse_check_page(newpage) != 0)
 823                goto out_fallback_unlock;
 824
 825        /*
 826         * This is a new and locked page, it shouldn't be mapped or
 827         * have any special flags on it
 828         */
 829        if (WARN_ON(page_mapped(oldpage)))
 830                goto out_fallback_unlock;
 831        if (WARN_ON(page_has_private(oldpage)))
 832                goto out_fallback_unlock;
 833        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 834                goto out_fallback_unlock;
 835        if (WARN_ON(PageMlocked(oldpage)))
 836                goto out_fallback_unlock;
 837
 838        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 839        if (err) {
 840                unlock_page(newpage);
 841                return err;
 842        }
 843
 844        page_cache_get(newpage);
 845
 846        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 847                lru_cache_add_file(newpage);
 848
 849        err = 0;
 850        spin_lock(&cs->fc->lock);
 851        if (cs->req->aborted)
 852                err = -ENOENT;
 853        else
 854                *pagep = newpage;
 855        spin_unlock(&cs->fc->lock);
 856
 857        if (err) {
 858                unlock_page(newpage);
 859                page_cache_release(newpage);
 860                return err;
 861        }
 862
 863        unlock_page(oldpage);
 864        page_cache_release(oldpage);
 865        cs->len = 0;
 866
 867        return 0;
 868
 869out_fallback_unlock:
 870        unlock_page(newpage);
 871out_fallback:
 872        cs->pg = buf->page;
 873        cs->offset = buf->offset;
 874
 875        err = lock_request(cs->fc, cs->req);
 876        if (err)
 877                return err;
 878
 879        return 1;
 880}
 881
 882static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 883                         unsigned offset, unsigned count)
 884{
 885        struct pipe_buffer *buf;
 886
 887        if (cs->nr_segs == cs->pipe->buffers)
 888                return -EIO;
 889
 890        unlock_request(cs->fc, cs->req);
 891        fuse_copy_finish(cs);
 892
 893        buf = cs->pipebufs;
 894        page_cache_get(page);
 895        buf->page = page;
 896        buf->offset = offset;
 897        buf->len = count;
 898
 899        cs->pipebufs++;
 900        cs->nr_segs++;
 901        cs->len = 0;
 902
 903        return 0;
 904}
 905
 906/*
 907 * Copy a page in the request to/from the userspace buffer.  Must be
 908 * done atomically
 909 */
 910static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 911                          unsigned offset, unsigned count, int zeroing)
 912{
 913        int err;
 914        struct page *page = *pagep;
 915
 916        if (page && zeroing && count < PAGE_SIZE)
 917                clear_highpage(page);
 918
 919        while (count) {
 920                if (cs->write && cs->pipebufs && page) {
 921                        return fuse_ref_page(cs, page, offset, count);
 922                } else if (!cs->len) {
 923                        if (cs->move_pages && page &&
 924                            offset == 0 && count == PAGE_SIZE) {
 925                                err = fuse_try_move_page(cs, pagep);
 926                                if (err <= 0)
 927                                        return err;
 928                        } else {
 929                                err = fuse_copy_fill(cs);
 930                                if (err)
 931                                        return err;
 932                        }
 933                }
 934                if (page) {
 935                        void *mapaddr = kmap_atomic(page);
 936                        void *buf = mapaddr + offset;
 937                        offset += fuse_copy_do(cs, &buf, &count);
 938                        kunmap_atomic(mapaddr);
 939                } else
 940                        offset += fuse_copy_do(cs, NULL, &count);
 941        }
 942        if (page && !cs->write)
 943                flush_dcache_page(page);
 944        return 0;
 945}
 946
 947/* Copy pages in the request to/from userspace buffer */
 948static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 949                           int zeroing)
 950{
 951        unsigned i;
 952        struct fuse_req *req = cs->req;
 953
 954        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 955                int err;
 956                unsigned offset = req->page_descs[i].offset;
 957                unsigned count = min(nbytes, req->page_descs[i].length);
 958
 959                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 960                                     zeroing);
 961                if (err)
 962                        return err;
 963
 964                nbytes -= count;
 965        }
 966        return 0;
 967}
 968
 969/* Copy a single argument in the request to/from userspace buffer */
 970static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 971{
 972        while (size) {
 973                if (!cs->len) {
 974                        int err = fuse_copy_fill(cs);
 975                        if (err)
 976                                return err;
 977                }
 978                fuse_copy_do(cs, &val, &size);
 979        }
 980        return 0;
 981}
 982
 983/* Copy request arguments to/from userspace buffer */
 984static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 985                          unsigned argpages, struct fuse_arg *args,
 986                          int zeroing)
 987{
 988        int err = 0;
 989        unsigned i;
 990
 991        for (i = 0; !err && i < numargs; i++)  {
 992                struct fuse_arg *arg = &args[i];
 993                if (i == numargs - 1 && argpages)
 994                        err = fuse_copy_pages(cs, arg->size, zeroing);
 995                else
 996                        err = fuse_copy_one(cs, arg->value, arg->size);
 997        }
 998        return err;
 999}
1000
1001static int forget_pending(struct fuse_conn *fc)
1002{
1003        return fc->forget_list_head.next != NULL;
1004}
1005
1006static int request_pending(struct fuse_conn *fc)
1007{
1008        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
1009                forget_pending(fc);
1010}
1011
1012/* Wait until a request is available on the pending list */
1013static void request_wait(struct fuse_conn *fc)
1014__releases(fc->lock)
1015__acquires(fc->lock)
1016{
1017        DECLARE_WAITQUEUE(wait, current);
1018
1019        add_wait_queue_exclusive(&fc->waitq, &wait);
1020        while (fc->connected && !request_pending(fc)) {
1021                set_current_state(TASK_INTERRUPTIBLE);
1022                if (signal_pending(current))
1023                        break;
1024
1025                spin_unlock(&fc->lock);
1026                schedule();
1027                spin_lock(&fc->lock);
1028        }
1029        set_current_state(TASK_RUNNING);
1030        remove_wait_queue(&fc->waitq, &wait);
1031}
1032
1033/*
1034 * Transfer an interrupt request to userspace
1035 *
1036 * Unlike other requests this is assembled on demand, without a need
1037 * to allocate a separate fuse_req structure.
1038 *
1039 * Called with fc->lock held, releases it
1040 */
1041static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
1042                               size_t nbytes, struct fuse_req *req)
1043__releases(fc->lock)
1044{
1045        struct fuse_in_header ih;
1046        struct fuse_interrupt_in arg;
1047        unsigned reqsize = sizeof(ih) + sizeof(arg);
1048        int err;
1049
1050        list_del_init(&req->intr_entry);
1051        req->intr_unique = fuse_get_unique(fc);
1052        memset(&ih, 0, sizeof(ih));
1053        memset(&arg, 0, sizeof(arg));
1054        ih.len = reqsize;
1055        ih.opcode = FUSE_INTERRUPT;
1056        ih.unique = req->intr_unique;
1057        arg.unique = req->in.h.unique;
1058
1059        spin_unlock(&fc->lock);
1060        if (nbytes < reqsize)
1061                return -EINVAL;
1062
1063        err = fuse_copy_one(cs, &ih, sizeof(ih));
1064        if (!err)
1065                err = fuse_copy_one(cs, &arg, sizeof(arg));
1066        fuse_copy_finish(cs);
1067
1068        return err ? err : reqsize;
1069}
1070
1071static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
1072                                               unsigned max,
1073                                               unsigned *countp)
1074{
1075        struct fuse_forget_link *head = fc->forget_list_head.next;
1076        struct fuse_forget_link **newhead = &head;
1077        unsigned count;
1078
1079        for (count = 0; *newhead != NULL && count < max; count++)
1080                newhead = &(*newhead)->next;
1081
1082        fc->forget_list_head.next = *newhead;
1083        *newhead = NULL;
1084        if (fc->forget_list_head.next == NULL)
1085                fc->forget_list_tail = &fc->forget_list_head;
1086
1087        if (countp != NULL)
1088                *countp = count;
1089
1090        return head;
1091}
1092
1093static int fuse_read_single_forget(struct fuse_conn *fc,
1094                                   struct fuse_copy_state *cs,
1095                                   size_t nbytes)
1096__releases(fc->lock)
1097{
1098        int err;
1099        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1100        struct fuse_forget_in arg = {
1101                .nlookup = forget->forget_one.nlookup,
1102        };
1103        struct fuse_in_header ih = {
1104                .opcode = FUSE_FORGET,
1105                .nodeid = forget->forget_one.nodeid,
1106                .unique = fuse_get_unique(fc),
1107                .len = sizeof(ih) + sizeof(arg),
1108        };
1109
1110        spin_unlock(&fc->lock);
1111        kfree(forget);
1112        if (nbytes < ih.len)
1113                return -EINVAL;
1114
1115        err = fuse_copy_one(cs, &ih, sizeof(ih));
1116        if (!err)
1117                err = fuse_copy_one(cs, &arg, sizeof(arg));
1118        fuse_copy_finish(cs);
1119
1120        if (err)
1121                return err;
1122
1123        return ih.len;
1124}
1125
1126static int fuse_read_batch_forget(struct fuse_conn *fc,
1127                                   struct fuse_copy_state *cs, size_t nbytes)
1128__releases(fc->lock)
1129{
1130        int err;
1131        unsigned max_forgets;
1132        unsigned count;
1133        struct fuse_forget_link *head;
1134        struct fuse_batch_forget_in arg = { .count = 0 };
1135        struct fuse_in_header ih = {
1136                .opcode = FUSE_BATCH_FORGET,
1137                .unique = fuse_get_unique(fc),
1138                .len = sizeof(ih) + sizeof(arg),
1139        };
1140
1141        if (nbytes < ih.len) {
1142                spin_unlock(&fc->lock);
1143                return -EINVAL;
1144        }
1145
1146        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1147        head = dequeue_forget(fc, max_forgets, &count);
1148        spin_unlock(&fc->lock);
1149
1150        arg.count = count;
1151        ih.len += count * sizeof(struct fuse_forget_one);
1152        err = fuse_copy_one(cs, &ih, sizeof(ih));
1153        if (!err)
1154                err = fuse_copy_one(cs, &arg, sizeof(arg));
1155
1156        while (head) {
1157                struct fuse_forget_link *forget = head;
1158
1159                if (!err) {
1160                        err = fuse_copy_one(cs, &forget->forget_one,
1161                                            sizeof(forget->forget_one));
1162                }
1163                head = forget->next;
1164                kfree(forget);
1165        }
1166
1167        fuse_copy_finish(cs);
1168
1169        if (err)
1170                return err;
1171
1172        return ih.len;
1173}
1174
1175static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1176                            size_t nbytes)
1177__releases(fc->lock)
1178{
1179        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1180                return fuse_read_single_forget(fc, cs, nbytes);
1181        else
1182                return fuse_read_batch_forget(fc, cs, nbytes);
1183}
1184
1185/*
1186 * Read a single request into the userspace filesystem's buffer.  This
1187 * function waits until a request is available, then removes it from
1188 * the pending list and copies request data to userspace buffer.  If
1189 * no reply is needed (FORGET) or request has been aborted or there
1190 * was an error during the copying then it's finished by calling
1191 * request_end().  Otherwise add it to the processing list, and set
1192 * the 'sent' flag.
1193 */
1194static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1195                                struct fuse_copy_state *cs, size_t nbytes)
1196{
1197        int err;
1198        struct fuse_req *req;
1199        struct fuse_in *in;
1200        unsigned reqsize;
1201
1202 restart:
1203        spin_lock(&fc->lock);
1204        err = -EAGAIN;
1205        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1206            !request_pending(fc))
1207                goto err_unlock;
1208
1209        request_wait(fc);
1210        err = -ENODEV;
1211        if (!fc->connected)
1212                goto err_unlock;
1213        err = -ERESTARTSYS;
1214        if (!request_pending(fc))
1215                goto err_unlock;
1216
1217        if (!list_empty(&fc->interrupts)) {
1218                req = list_entry(fc->interrupts.next, struct fuse_req,
1219                                 intr_entry);
1220                return fuse_read_interrupt(fc, cs, nbytes, req);
1221        }
1222
1223        if (forget_pending(fc)) {
1224                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1225                        return fuse_read_forget(fc, cs, nbytes);
1226
1227                if (fc->forget_batch <= -8)
1228                        fc->forget_batch = 16;
1229        }
1230
1231        req = list_entry(fc->pending.next, struct fuse_req, list);
1232        req->state = FUSE_REQ_READING;
1233        list_move(&req->list, &fc->io);
1234
1235        in = &req->in;
1236        reqsize = in->h.len;
1237        /* If request is too large, reply with an error and restart the read */
1238        if (nbytes < reqsize) {
1239                req->out.h.error = -EIO;
1240                /* SETXATTR is special, since it may contain too large data */
1241                if (in->h.opcode == FUSE_SETXATTR)
1242                        req->out.h.error = -E2BIG;
1243                request_end(fc, req);
1244                goto restart;
1245        }
1246        spin_unlock(&fc->lock);
1247        cs->req = req;
1248        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1249        if (!err)
1250                err = fuse_copy_args(cs, in->numargs, in->argpages,
1251                                     (struct fuse_arg *) in->args, 0);
1252        fuse_copy_finish(cs);
1253        spin_lock(&fc->lock);
1254        req->locked = 0;
1255        if (req->aborted) {
1256                request_end(fc, req);
1257                return -ENODEV;
1258        }
1259        if (err) {
1260                req->out.h.error = -EIO;
1261                request_end(fc, req);
1262                return err;
1263        }
1264        if (!req->isreply)
1265                request_end(fc, req);
1266        else {
1267                req->state = FUSE_REQ_SENT;
1268                list_move_tail(&req->list, &fc->processing);
1269                if (req->interrupted)
1270                        queue_interrupt(fc, req);
1271                spin_unlock(&fc->lock);
1272        }
1273        return reqsize;
1274
1275 err_unlock:
1276        spin_unlock(&fc->lock);
1277        return err;
1278}
1279
1280static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1281                              unsigned long nr_segs, loff_t pos)
1282{
1283        struct fuse_copy_state cs;
1284        struct file *file = iocb->ki_filp;
1285        struct fuse_conn *fc = fuse_get_conn(file);
1286        if (!fc)
1287                return -EPERM;
1288
1289        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1290
1291        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1292}
1293
1294static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1295                                    struct pipe_inode_info *pipe,
1296                                    size_t len, unsigned int flags)
1297{
1298        int ret;
1299        int page_nr = 0;
1300        int do_wakeup = 0;
1301        struct pipe_buffer *bufs;
1302        struct fuse_copy_state cs;
1303        struct fuse_conn *fc = fuse_get_conn(in);
1304        if (!fc)
1305                return -EPERM;
1306
1307        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1308        if (!bufs)
1309                return -ENOMEM;
1310
1311        fuse_copy_init(&cs, fc, 1, NULL, 0);
1312        cs.pipebufs = bufs;
1313        cs.pipe = pipe;
1314        ret = fuse_dev_do_read(fc, in, &cs, len);
1315        if (ret < 0)
1316                goto out;
1317
1318        ret = 0;
1319        pipe_lock(pipe);
1320
1321        if (!pipe->readers) {
1322                send_sig(SIGPIPE, current, 0);
1323                if (!ret)
1324                        ret = -EPIPE;
1325                goto out_unlock;
1326        }
1327
1328        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1329                ret = -EIO;
1330                goto out_unlock;
1331        }
1332
1333        while (page_nr < cs.nr_segs) {
1334                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1335                struct pipe_buffer *buf = pipe->bufs + newbuf;
1336
1337                buf->page = bufs[page_nr].page;
1338                buf->offset = bufs[page_nr].offset;
1339                buf->len = bufs[page_nr].len;
1340                /*
1341                 * Need to be careful about this.  Having buf->ops in module
1342                 * code can Oops if the buffer persists after module unload.
1343                 */
1344                buf->ops = &nosteal_pipe_buf_ops;
1345
1346                pipe->nrbufs++;
1347                page_nr++;
1348                ret += buf->len;
1349
1350                if (pipe->files)
1351                        do_wakeup = 1;
1352        }
1353
1354out_unlock:
1355        pipe_unlock(pipe);
1356
1357        if (do_wakeup) {
1358                smp_mb();
1359                if (waitqueue_active(&pipe->wait))
1360                        wake_up_interruptible(&pipe->wait);
1361                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1362        }
1363
1364out:
1365        for (; page_nr < cs.nr_segs; page_nr++)
1366                page_cache_release(bufs[page_nr].page);
1367
1368        kfree(bufs);
1369        return ret;
1370}
1371
1372static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1373                            struct fuse_copy_state *cs)
1374{
1375        struct fuse_notify_poll_wakeup_out outarg;
1376        int err = -EINVAL;
1377
1378        if (size != sizeof(outarg))
1379                goto err;
1380
1381        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1382        if (err)
1383                goto err;
1384
1385        fuse_copy_finish(cs);
1386        return fuse_notify_poll_wakeup(fc, &outarg);
1387
1388err:
1389        fuse_copy_finish(cs);
1390        return err;
1391}
1392
1393static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1394                                   struct fuse_copy_state *cs)
1395{
1396        struct fuse_notify_inval_inode_out outarg;
1397        int err = -EINVAL;
1398
1399        if (size != sizeof(outarg))
1400                goto err;
1401
1402        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1403        if (err)
1404                goto err;
1405        fuse_copy_finish(cs);
1406
1407        down_read(&fc->killsb);
1408        err = -ENOENT;
1409        if (fc->sb) {
1410                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1411                                               outarg.off, outarg.len);
1412        }
1413        up_read(&fc->killsb);
1414        return err;
1415
1416err:
1417        fuse_copy_finish(cs);
1418        return err;
1419}
1420
1421static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1422                                   struct fuse_copy_state *cs)
1423{
1424        struct fuse_notify_inval_entry_out outarg;
1425        int err = -ENOMEM;
1426        char *buf;
1427        struct qstr name;
1428
1429        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1430        if (!buf)
1431                goto err;
1432
1433        err = -EINVAL;
1434        if (size < sizeof(outarg))
1435                goto err;
1436
1437        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1438        if (err)
1439                goto err;
1440
1441        err = -ENAMETOOLONG;
1442        if (outarg.namelen > FUSE_NAME_MAX)
1443                goto err;
1444
1445        err = -EINVAL;
1446        if (size != sizeof(outarg) + outarg.namelen + 1)
1447                goto err;
1448
1449        name.name = buf;
1450        name.len = outarg.namelen;
1451        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1452        if (err)
1453                goto err;
1454        fuse_copy_finish(cs);
1455        buf[outarg.namelen] = 0;
1456        name.hash = full_name_hash(name.name, name.len);
1457
1458        down_read(&fc->killsb);
1459        err = -ENOENT;
1460        if (fc->sb)
1461                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1462        up_read(&fc->killsb);
1463        kfree(buf);
1464        return err;
1465
1466err:
1467        kfree(buf);
1468        fuse_copy_finish(cs);
1469        return err;
1470}
1471
1472static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1473                              struct fuse_copy_state *cs)
1474{
1475        struct fuse_notify_delete_out outarg;
1476        int err = -ENOMEM;
1477        char *buf;
1478        struct qstr name;
1479
1480        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1481        if (!buf)
1482                goto err;
1483
1484        err = -EINVAL;
1485        if (size < sizeof(outarg))
1486                goto err;
1487
1488        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1489        if (err)
1490                goto err;
1491
1492        err = -ENAMETOOLONG;
1493        if (outarg.namelen > FUSE_NAME_MAX)
1494                goto err;
1495
1496        err = -EINVAL;
1497        if (size != sizeof(outarg) + outarg.namelen + 1)
1498                goto err;
1499
1500        name.name = buf;
1501        name.len = outarg.namelen;
1502        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1503        if (err)
1504                goto err;
1505        fuse_copy_finish(cs);
1506        buf[outarg.namelen] = 0;
1507        name.hash = full_name_hash(name.name, name.len);
1508
1509        down_read(&fc->killsb);
1510        err = -ENOENT;
1511        if (fc->sb)
1512                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1513                                               outarg.child, &name);
1514        up_read(&fc->killsb);
1515        kfree(buf);
1516        return err;
1517
1518err:
1519        kfree(buf);
1520        fuse_copy_finish(cs);
1521        return err;
1522}
1523
1524static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1525                             struct fuse_copy_state *cs)
1526{
1527        struct fuse_notify_store_out outarg;
1528        struct inode *inode;
1529        struct address_space *mapping;
1530        u64 nodeid;
1531        int err;
1532        pgoff_t index;
1533        unsigned int offset;
1534        unsigned int num;
1535        loff_t file_size;
1536        loff_t end;
1537
1538        err = -EINVAL;
1539        if (size < sizeof(outarg))
1540                goto out_finish;
1541
1542        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1543        if (err)
1544                goto out_finish;
1545
1546        err = -EINVAL;
1547        if (size - sizeof(outarg) != outarg.size)
1548                goto out_finish;
1549
1550        nodeid = outarg.nodeid;
1551
1552        down_read(&fc->killsb);
1553
1554        err = -ENOENT;
1555        if (!fc->sb)
1556                goto out_up_killsb;
1557
1558        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1559        if (!inode)
1560                goto out_up_killsb;
1561
1562        mapping = inode->i_mapping;
1563        index = outarg.offset >> PAGE_CACHE_SHIFT;
1564        offset = outarg.offset & ~PAGE_CACHE_MASK;
1565        file_size = i_size_read(inode);
1566        end = outarg.offset + outarg.size;
1567        if (end > file_size) {
1568                file_size = end;
1569                fuse_write_update_size(inode, file_size);
1570        }
1571
1572        num = outarg.size;
1573        while (num) {
1574                struct page *page;
1575                unsigned int this_num;
1576
1577                err = -ENOMEM;
1578                page = find_or_create_page(mapping, index,
1579                                           mapping_gfp_mask(mapping));
1580                if (!page)
1581                        goto out_iput;
1582
1583                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1584                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1585                if (!err && offset == 0 &&
1586                    (this_num == PAGE_CACHE_SIZE || file_size == end))
1587                        SetPageUptodate(page);
1588                unlock_page(page);
1589                page_cache_release(page);
1590
1591                if (err)
1592                        goto out_iput;
1593
1594                num -= this_num;
1595                offset = 0;
1596                index++;
1597        }
1598
1599        err = 0;
1600
1601out_iput:
1602        iput(inode);
1603out_up_killsb:
1604        up_read(&fc->killsb);
1605out_finish:
1606        fuse_copy_finish(cs);
1607        return err;
1608}
1609
1610static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1611{
1612        release_pages(req->pages, req->num_pages, false);
1613}
1614
1615static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1616                         struct fuse_notify_retrieve_out *outarg)
1617{
1618        int err;
1619        struct address_space *mapping = inode->i_mapping;
1620        struct fuse_req *req;
1621        pgoff_t index;
1622        loff_t file_size;
1623        unsigned int num;
1624        unsigned int offset;
1625        size_t total_len = 0;
1626        int num_pages;
1627
1628        offset = outarg->offset & ~PAGE_CACHE_MASK;
1629        file_size = i_size_read(inode);
1630
1631        num = outarg->size;
1632        if (outarg->offset > file_size)
1633                num = 0;
1634        else if (outarg->offset + num > file_size)
1635                num = file_size - outarg->offset;
1636
1637        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1638        num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1639
1640        req = fuse_get_req(fc, num_pages);
1641        if (IS_ERR(req))
1642                return PTR_ERR(req);
1643
1644        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1645        req->in.h.nodeid = outarg->nodeid;
1646        req->in.numargs = 2;
1647        req->in.argpages = 1;
1648        req->page_descs[0].offset = offset;
1649        req->end = fuse_retrieve_end;
1650
1651        index = outarg->offset >> PAGE_CACHE_SHIFT;
1652
1653        while (num && req->num_pages < num_pages) {
1654                struct page *page;
1655                unsigned int this_num;
1656
1657                page = find_get_page(mapping, index);
1658                if (!page)
1659                        break;
1660
1661                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1662                req->pages[req->num_pages] = page;
1663                req->page_descs[req->num_pages].length = this_num;
1664                req->num_pages++;
1665
1666                offset = 0;
1667                num -= this_num;
1668                total_len += this_num;
1669                index++;
1670        }
1671        req->misc.retrieve_in.offset = outarg->offset;
1672        req->misc.retrieve_in.size = total_len;
1673        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1674        req->in.args[0].value = &req->misc.retrieve_in;
1675        req->in.args[1].size = total_len;
1676
1677        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1678        if (err)
1679                fuse_retrieve_end(fc, req);
1680
1681        return err;
1682}
1683
1684static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1685                                struct fuse_copy_state *cs)
1686{
1687        struct fuse_notify_retrieve_out outarg;
1688        struct inode *inode;
1689        int err;
1690
1691        err = -EINVAL;
1692        if (size != sizeof(outarg))
1693                goto copy_finish;
1694
1695        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1696        if (err)
1697                goto copy_finish;
1698
1699        fuse_copy_finish(cs);
1700
1701        down_read(&fc->killsb);
1702        err = -ENOENT;
1703        if (fc->sb) {
1704                u64 nodeid = outarg.nodeid;
1705
1706                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1707                if (inode) {
1708                        err = fuse_retrieve(fc, inode, &outarg);
1709                        iput(inode);
1710                }
1711        }
1712        up_read(&fc->killsb);
1713
1714        return err;
1715
1716copy_finish:
1717        fuse_copy_finish(cs);
1718        return err;
1719}
1720
1721static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1722                       unsigned int size, struct fuse_copy_state *cs)
1723{
1724        switch (code) {
1725        case FUSE_NOTIFY_POLL:
1726                return fuse_notify_poll(fc, size, cs);
1727
1728        case FUSE_NOTIFY_INVAL_INODE:
1729                return fuse_notify_inval_inode(fc, size, cs);
1730
1731        case FUSE_NOTIFY_INVAL_ENTRY:
1732                return fuse_notify_inval_entry(fc, size, cs);
1733
1734        case FUSE_NOTIFY_STORE:
1735                return fuse_notify_store(fc, size, cs);
1736
1737        case FUSE_NOTIFY_RETRIEVE:
1738                return fuse_notify_retrieve(fc, size, cs);
1739
1740        case FUSE_NOTIFY_DELETE:
1741                return fuse_notify_delete(fc, size, cs);
1742
1743        default:
1744                fuse_copy_finish(cs);
1745                return -EINVAL;
1746        }
1747}
1748
1749/* Look up request on processing list by unique ID */
1750static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1751{
1752        struct fuse_req *req;
1753
1754        list_for_each_entry(req, &fc->processing, list) {
1755                if (req->in.h.unique == unique || req->intr_unique == unique)
1756                        return req;
1757        }
1758        return NULL;
1759}
1760
1761static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1762                         unsigned nbytes)
1763{
1764        unsigned reqsize = sizeof(struct fuse_out_header);
1765
1766        if (out->h.error)
1767                return nbytes != reqsize ? -EINVAL : 0;
1768
1769        reqsize += len_args(out->numargs, out->args);
1770
1771        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1772                return -EINVAL;
1773        else if (reqsize > nbytes) {
1774                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1775                unsigned diffsize = reqsize - nbytes;
1776                if (diffsize > lastarg->size)
1777                        return -EINVAL;
1778                lastarg->size -= diffsize;
1779        }
1780        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1781                              out->page_zeroing);
1782}
1783
1784/*
1785 * Write a single reply to a request.  First the header is copied from
1786 * the write buffer.  The request is then searched on the processing
1787 * list by the unique ID found in the header.  If found, then remove
1788 * it from the list and copy the rest of the buffer to the request.
1789 * The request is finished by calling request_end()
1790 */
1791static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1792                                 struct fuse_copy_state *cs, size_t nbytes)
1793{
1794        int err;
1795        struct fuse_req *req;
1796        struct fuse_out_header oh;
1797
1798        if (nbytes < sizeof(struct fuse_out_header))
1799                return -EINVAL;
1800
1801        err = fuse_copy_one(cs, &oh, sizeof(oh));
1802        if (err)
1803                goto err_finish;
1804
1805        err = -EINVAL;
1806        if (oh.len != nbytes)
1807                goto err_finish;
1808
1809        /*
1810         * Zero oh.unique indicates unsolicited notification message
1811         * and error contains notification code.
1812         */
1813        if (!oh.unique) {
1814                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1815                return err ? err : nbytes;
1816        }
1817
1818        err = -EINVAL;
1819        if (oh.error <= -1000 || oh.error > 0)
1820                goto err_finish;
1821
1822        spin_lock(&fc->lock);
1823        err = -ENOENT;
1824        if (!fc->connected)
1825                goto err_unlock;
1826
1827        req = request_find(fc, oh.unique);
1828        if (!req)
1829                goto err_unlock;
1830
1831        if (req->aborted) {
1832                spin_unlock(&fc->lock);
1833                fuse_copy_finish(cs);
1834                spin_lock(&fc->lock);
1835                request_end(fc, req);
1836                return -ENOENT;
1837        }
1838        /* Is it an interrupt reply? */
1839        if (req->intr_unique == oh.unique) {
1840                err = -EINVAL;
1841                if (nbytes != sizeof(struct fuse_out_header))
1842                        goto err_unlock;
1843
1844                if (oh.error == -ENOSYS)
1845                        fc->no_interrupt = 1;
1846                else if (oh.error == -EAGAIN)
1847                        queue_interrupt(fc, req);
1848
1849                spin_unlock(&fc->lock);
1850                fuse_copy_finish(cs);
1851                return nbytes;
1852        }
1853
1854        req->state = FUSE_REQ_WRITING;
1855        list_move(&req->list, &fc->io);
1856        req->out.h = oh;
1857        req->locked = 1;
1858        cs->req = req;
1859        if (!req->out.page_replace)
1860                cs->move_pages = 0;
1861        spin_unlock(&fc->lock);
1862
1863        err = copy_out_args(cs, &req->out, nbytes);
1864        fuse_copy_finish(cs);
1865
1866        spin_lock(&fc->lock);
1867        req->locked = 0;
1868        if (!err) {
1869                if (req->aborted)
1870                        err = -ENOENT;
1871        } else if (!req->aborted)
1872                req->out.h.error = -EIO;
1873        request_end(fc, req);
1874
1875        return err ? err : nbytes;
1876
1877 err_unlock:
1878        spin_unlock(&fc->lock);
1879 err_finish:
1880        fuse_copy_finish(cs);
1881        return err;
1882}
1883
1884static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1885                              unsigned long nr_segs, loff_t pos)
1886{
1887        struct fuse_copy_state cs;
1888        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1889        if (!fc)
1890                return -EPERM;
1891
1892        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1893
1894        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1895}
1896
1897static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1898                                     struct file *out, loff_t *ppos,
1899                                     size_t len, unsigned int flags)
1900{
1901        unsigned nbuf;
1902        unsigned idx;
1903        struct pipe_buffer *bufs;
1904        struct fuse_copy_state cs;
1905        struct fuse_conn *fc;
1906        size_t rem;
1907        ssize_t ret;
1908
1909        fc = fuse_get_conn(out);
1910        if (!fc)
1911                return -EPERM;
1912
1913        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1914        if (!bufs)
1915                return -ENOMEM;
1916
1917        pipe_lock(pipe);
1918        nbuf = 0;
1919        rem = 0;
1920        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1921                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1922
1923        ret = -EINVAL;
1924        if (rem < len) {
1925                pipe_unlock(pipe);
1926                goto out;
1927        }
1928
1929        rem = len;
1930        while (rem) {
1931                struct pipe_buffer *ibuf;
1932                struct pipe_buffer *obuf;
1933
1934                BUG_ON(nbuf >= pipe->buffers);
1935                BUG_ON(!pipe->nrbufs);
1936                ibuf = &pipe->bufs[pipe->curbuf];
1937                obuf = &bufs[nbuf];
1938
1939                if (rem >= ibuf->len) {
1940                        *obuf = *ibuf;
1941                        ibuf->ops = NULL;
1942                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1943                        pipe->nrbufs--;
1944                } else {
1945                        ibuf->ops->get(pipe, ibuf);
1946                        *obuf = *ibuf;
1947                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1948                        obuf->len = rem;
1949                        ibuf->offset += obuf->len;
1950                        ibuf->len -= obuf->len;
1951                }
1952                nbuf++;
1953                rem -= obuf->len;
1954        }
1955        pipe_unlock(pipe);
1956
1957        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1958        cs.pipebufs = bufs;
1959        cs.pipe = pipe;
1960
1961        if (flags & SPLICE_F_MOVE)
1962                cs.move_pages = 1;
1963
1964        ret = fuse_dev_do_write(fc, &cs, len);
1965
1966        for (idx = 0; idx < nbuf; idx++) {
1967                struct pipe_buffer *buf = &bufs[idx];
1968                buf->ops->release(pipe, buf);
1969        }
1970out:
1971        kfree(bufs);
1972        return ret;
1973}
1974
1975static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1976{
1977        unsigned mask = POLLOUT | POLLWRNORM;
1978        struct fuse_conn *fc = fuse_get_conn(file);
1979        if (!fc)
1980                return POLLERR;
1981
1982        poll_wait(file, &fc->waitq, wait);
1983
1984        spin_lock(&fc->lock);
1985        if (!fc->connected)
1986                mask = POLLERR;
1987        else if (request_pending(fc))
1988                mask |= POLLIN | POLLRDNORM;
1989        spin_unlock(&fc->lock);
1990
1991        return mask;
1992}
1993
1994/*
1995 * Abort all requests on the given list (pending or processing)
1996 *
1997 * This function releases and reacquires fc->lock
1998 */
1999static void end_requests(struct fuse_conn *fc, struct list_head *head)
2000__releases(fc->lock)
2001__acquires(fc->lock)
2002{
2003        while (!list_empty(head)) {
2004                struct fuse_req *req;
2005                req = list_entry(head->next, struct fuse_req, list);
2006                req->out.h.error = -ECONNABORTED;
2007                request_end(fc, req);
2008                spin_lock(&fc->lock);
2009        }
2010}
2011
2012/*
2013 * Abort requests under I/O
2014 *
2015 * The requests are set to aborted and finished, and the request
2016 * waiter is woken up.  This will make request_wait_answer() wait
2017 * until the request is unlocked and then return.
2018 *
2019 * If the request is asynchronous, then the end function needs to be
2020 * called after waiting for the request to be unlocked (if it was
2021 * locked).
2022 */
2023static void end_io_requests(struct fuse_conn *fc)
2024__releases(fc->lock)
2025__acquires(fc->lock)
2026{
2027        while (!list_empty(&fc->io)) {
2028                struct fuse_req *req =
2029                        list_entry(fc->io.next, struct fuse_req, list);
2030                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
2031
2032                req->aborted = 1;
2033                req->out.h.error = -ECONNABORTED;
2034                req->state = FUSE_REQ_FINISHED;
2035                list_del_init(&req->list);
2036                wake_up(&req->waitq);
2037                if (end) {
2038                        req->end = NULL;
2039                        __fuse_get_request(req);
2040                        spin_unlock(&fc->lock);
2041                        wait_event(req->waitq, !req->locked);
2042                        end(fc, req);
2043                        fuse_put_request(fc, req);
2044                        spin_lock(&fc->lock);
2045                }
2046        }
2047}
2048
2049static void end_queued_requests(struct fuse_conn *fc)
2050__releases(fc->lock)
2051__acquires(fc->lock)
2052{
2053        fc->max_background = UINT_MAX;
2054        flush_bg_queue(fc);
2055        end_requests(fc, &fc->pending);
2056        end_requests(fc, &fc->processing);
2057        while (forget_pending(fc))
2058                kfree(dequeue_forget(fc, 1, NULL));
2059}
2060
2061static void end_polls(struct fuse_conn *fc)
2062{
2063        struct rb_node *p;
2064
2065        p = rb_first(&fc->polled_files);
2066
2067        while (p) {
2068                struct fuse_file *ff;
2069                ff = rb_entry(p, struct fuse_file, polled_node);
2070                wake_up_interruptible_all(&ff->poll_wait);
2071
2072                p = rb_next(p);
2073        }
2074}
2075
2076/*
2077 * Abort all requests.
2078 *
2079 * Emergency exit in case of a malicious or accidental deadlock, or
2080 * just a hung filesystem.
2081 *
2082 * The same effect is usually achievable through killing the
2083 * filesystem daemon and all users of the filesystem.  The exception
2084 * is the combination of an asynchronous request and the tricky
2085 * deadlock (see Documentation/filesystems/fuse.txt).
2086 *
2087 * During the aborting, progression of requests from the pending and
2088 * processing lists onto the io list, and progression of new requests
2089 * onto the pending list is prevented by req->connected being false.
2090 *
2091 * Progression of requests under I/O to the processing list is
2092 * prevented by the req->aborted flag being true for these requests.
2093 * For this reason requests on the io list must be aborted first.
2094 */
2095void fuse_abort_conn(struct fuse_conn *fc)
2096{
2097        spin_lock(&fc->lock);
2098        if (fc->connected) {
2099                fc->connected = 0;
2100                fc->blocked = 0;
2101                fc->initialized = 1;
2102                end_io_requests(fc);
2103                end_queued_requests(fc);
2104                end_polls(fc);
2105                wake_up_all(&fc->waitq);
2106                wake_up_all(&fc->blocked_waitq);
2107                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2108        }
2109        spin_unlock(&fc->lock);
2110}
2111EXPORT_SYMBOL_GPL(fuse_abort_conn);
2112
2113int fuse_dev_release(struct inode *inode, struct file *file)
2114{
2115        struct fuse_conn *fc = fuse_get_conn(file);
2116        if (fc) {
2117                spin_lock(&fc->lock);
2118                fc->connected = 0;
2119                fc->blocked = 0;
2120                fc->initialized = 1;
2121                end_queued_requests(fc);
2122                end_polls(fc);
2123                wake_up_all(&fc->blocked_waitq);
2124                spin_unlock(&fc->lock);
2125                fuse_conn_put(fc);
2126        }
2127
2128        return 0;
2129}
2130EXPORT_SYMBOL_GPL(fuse_dev_release);
2131
2132static int fuse_dev_fasync(int fd, struct file *file, int on)
2133{
2134        struct fuse_conn *fc = fuse_get_conn(file);
2135        if (!fc)
2136                return -EPERM;
2137
2138        /* No locking - fasync_helper does its own locking */
2139        return fasync_helper(fd, file, on, &fc->fasync);
2140}
2141
2142const struct file_operations fuse_dev_operations = {
2143        .owner          = THIS_MODULE,
2144        .llseek         = no_llseek,
2145        .read           = do_sync_read,
2146        .aio_read       = fuse_dev_read,
2147        .splice_read    = fuse_dev_splice_read,
2148        .write          = do_sync_write,
2149        .aio_write      = fuse_dev_write,
2150        .splice_write   = fuse_dev_splice_write,
2151        .poll           = fuse_dev_poll,
2152        .release        = fuse_dev_release,
2153        .fasync         = fuse_dev_fasync,
2154};
2155EXPORT_SYMBOL_GPL(fuse_dev_operations);
2156
2157static struct miscdevice fuse_miscdevice = {
2158        .minor = FUSE_MINOR,
2159        .name  = "fuse",
2160        .fops = &fuse_dev_operations,
2161};
2162
2163int __init fuse_dev_init(void)
2164{
2165        int err = -ENOMEM;
2166        fuse_req_cachep = kmem_cache_create("fuse_request",
2167                                            sizeof(struct fuse_req),
2168                                            0, 0, NULL);
2169        if (!fuse_req_cachep)
2170                goto out;
2171
2172        err = misc_register(&fuse_miscdevice);
2173        if (err)
2174                goto out_cache_clean;
2175
2176        return 0;
2177
2178 out_cache_clean:
2179        kmem_cache_destroy(fuse_req_cachep);
2180 out:
2181        return err;
2182}
2183
2184void fuse_dev_cleanup(void)
2185{
2186        misc_deregister(&fuse_miscdevice);
2187        kmem_cache_destroy(fuse_req_cachep);
2188}
2189