linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22
  23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  24MODULE_ALIAS("devname:fuse");
  25
  26static struct kmem_cache *fuse_req_cachep;
  27
  28static struct fuse_conn *fuse_get_conn(struct file *file)
  29{
  30        /*
  31         * Lockless access is OK, because file->private data is set
  32         * once during mount and is valid until the file is released.
  33         */
  34        return file->private_data;
  35}
  36
  37static void fuse_request_init(struct fuse_req *req)
  38{
  39        memset(req, 0, sizeof(*req));
  40        INIT_LIST_HEAD(&req->list);
  41        INIT_LIST_HEAD(&req->intr_entry);
  42        init_waitqueue_head(&req->waitq);
  43        atomic_set(&req->count, 1);
  44}
  45
  46struct fuse_req *fuse_request_alloc(void)
  47{
  48        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
  49        if (req)
  50                fuse_request_init(req);
  51        return req;
  52}
  53EXPORT_SYMBOL_GPL(fuse_request_alloc);
  54
  55struct fuse_req *fuse_request_alloc_nofs(void)
  56{
  57        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
  58        if (req)
  59                fuse_request_init(req);
  60        return req;
  61}
  62
  63void fuse_request_free(struct fuse_req *req)
  64{
  65        kmem_cache_free(fuse_req_cachep, req);
  66}
  67
  68static void block_sigs(sigset_t *oldset)
  69{
  70        sigset_t mask;
  71
  72        siginitsetinv(&mask, sigmask(SIGKILL));
  73        sigprocmask(SIG_BLOCK, &mask, oldset);
  74}
  75
  76static void restore_sigs(sigset_t *oldset)
  77{
  78        sigprocmask(SIG_SETMASK, oldset, NULL);
  79}
  80
  81static void __fuse_get_request(struct fuse_req *req)
  82{
  83        atomic_inc(&req->count);
  84}
  85
  86/* Must be called with > 1 refcount */
  87static void __fuse_put_request(struct fuse_req *req)
  88{
  89        BUG_ON(atomic_read(&req->count) < 2);
  90        atomic_dec(&req->count);
  91}
  92
  93static void fuse_req_init_context(struct fuse_req *req)
  94{
  95        req->in.h.uid = current_fsuid();
  96        req->in.h.gid = current_fsgid();
  97        req->in.h.pid = current->pid;
  98}
  99
 100struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 101{
 102        struct fuse_req *req;
 103        sigset_t oldset;
 104        int intr;
 105        int err;
 106
 107        atomic_inc(&fc->num_waiting);
 108        block_sigs(&oldset);
 109        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 110        restore_sigs(&oldset);
 111        err = -EINTR;
 112        if (intr)
 113                goto out;
 114
 115        err = -ENOTCONN;
 116        if (!fc->connected)
 117                goto out;
 118
 119        req = fuse_request_alloc();
 120        err = -ENOMEM;
 121        if (!req)
 122                goto out;
 123
 124        fuse_req_init_context(req);
 125        req->waiting = 1;
 126        return req;
 127
 128 out:
 129        atomic_dec(&fc->num_waiting);
 130        return ERR_PTR(err);
 131}
 132EXPORT_SYMBOL_GPL(fuse_get_req);
 133
 134/*
 135 * Return request in fuse_file->reserved_req.  However that may
 136 * currently be in use.  If that is the case, wait for it to become
 137 * available.
 138 */
 139static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 140                                         struct file *file)
 141{
 142        struct fuse_req *req = NULL;
 143        struct fuse_file *ff = file->private_data;
 144
 145        do {
 146                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 147                spin_lock(&fc->lock);
 148                if (ff->reserved_req) {
 149                        req = ff->reserved_req;
 150                        ff->reserved_req = NULL;
 151                        get_file(file);
 152                        req->stolen_file = file;
 153                }
 154                spin_unlock(&fc->lock);
 155        } while (!req);
 156
 157        return req;
 158}
 159
 160/*
 161 * Put stolen request back into fuse_file->reserved_req
 162 */
 163static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 164{
 165        struct file *file = req->stolen_file;
 166        struct fuse_file *ff = file->private_data;
 167
 168        spin_lock(&fc->lock);
 169        fuse_request_init(req);
 170        BUG_ON(ff->reserved_req);
 171        ff->reserved_req = req;
 172        wake_up_all(&fc->reserved_req_waitq);
 173        spin_unlock(&fc->lock);
 174        fput(file);
 175}
 176
 177/*
 178 * Gets a requests for a file operation, always succeeds
 179 *
 180 * This is used for sending the FLUSH request, which must get to
 181 * userspace, due to POSIX locks which may need to be unlocked.
 182 *
 183 * If allocation fails due to OOM, use the reserved request in
 184 * fuse_file.
 185 *
 186 * This is very unlikely to deadlock accidentally, since the
 187 * filesystem should not have it's own file open.  If deadlock is
 188 * intentional, it can still be broken by "aborting" the filesystem.
 189 */
 190struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 191{
 192        struct fuse_req *req;
 193
 194        atomic_inc(&fc->num_waiting);
 195        wait_event(fc->blocked_waitq, !fc->blocked);
 196        req = fuse_request_alloc();
 197        if (!req)
 198                req = get_reserved_req(fc, file);
 199
 200        fuse_req_init_context(req);
 201        req->waiting = 1;
 202        return req;
 203}
 204
 205void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 206{
 207        if (atomic_dec_and_test(&req->count)) {
 208                if (req->waiting)
 209                        atomic_dec(&fc->num_waiting);
 210
 211                if (req->stolen_file)
 212                        put_reserved_req(fc, req);
 213                else
 214                        fuse_request_free(req);
 215        }
 216}
 217EXPORT_SYMBOL_GPL(fuse_put_request);
 218
 219static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 220{
 221        unsigned nbytes = 0;
 222        unsigned i;
 223
 224        for (i = 0; i < numargs; i++)
 225                nbytes += args[i].size;
 226
 227        return nbytes;
 228}
 229
 230static u64 fuse_get_unique(struct fuse_conn *fc)
 231{
 232        fc->reqctr++;
 233        /* zero is special */
 234        if (fc->reqctr == 0)
 235                fc->reqctr = 1;
 236
 237        return fc->reqctr;
 238}
 239
 240static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 241{
 242        req->in.h.len = sizeof(struct fuse_in_header) +
 243                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 244        list_add_tail(&req->list, &fc->pending);
 245        req->state = FUSE_REQ_PENDING;
 246        if (!req->waiting) {
 247                req->waiting = 1;
 248                atomic_inc(&fc->num_waiting);
 249        }
 250        wake_up(&fc->waitq);
 251        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 252}
 253
 254void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 255                       u64 nodeid, u64 nlookup)
 256{
 257        forget->forget_one.nodeid = nodeid;
 258        forget->forget_one.nlookup = nlookup;
 259
 260        spin_lock(&fc->lock);
 261        fc->forget_list_tail->next = forget;
 262        fc->forget_list_tail = forget;
 263        wake_up(&fc->waitq);
 264        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 265        spin_unlock(&fc->lock);
 266}
 267
 268static void flush_bg_queue(struct fuse_conn *fc)
 269{
 270        while (fc->active_background < fc->max_background &&
 271               !list_empty(&fc->bg_queue)) {
 272                struct fuse_req *req;
 273
 274                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 275                list_del(&req->list);
 276                fc->active_background++;
 277                req->in.h.unique = fuse_get_unique(fc);
 278                queue_request(fc, req);
 279        }
 280}
 281
 282/*
 283 * This function is called when a request is finished.  Either a reply
 284 * has arrived or it was aborted (and not yet sent) or some error
 285 * occurred during communication with userspace, or the device file
 286 * was closed.  The requester thread is woken up (if still waiting),
 287 * the 'end' callback is called if given, else the reference to the
 288 * request is released
 289 *
 290 * Called with fc->lock, unlocks it
 291 */
 292static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 293__releases(fc->lock)
 294{
 295        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 296        req->end = NULL;
 297        list_del(&req->list);
 298        list_del(&req->intr_entry);
 299        req->state = FUSE_REQ_FINISHED;
 300        if (req->background) {
 301                if (fc->num_background == fc->max_background) {
 302                        fc->blocked = 0;
 303                        wake_up_all(&fc->blocked_waitq);
 304                }
 305                if (fc->num_background == fc->congestion_threshold &&
 306                    fc->connected && fc->bdi_initialized) {
 307                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 308                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 309                }
 310                fc->num_background--;
 311                fc->active_background--;
 312                flush_bg_queue(fc);
 313        }
 314        spin_unlock(&fc->lock);
 315        wake_up(&req->waitq);
 316        if (end)
 317                end(fc, req);
 318        fuse_put_request(fc, req);
 319}
 320
 321static void wait_answer_interruptible(struct fuse_conn *fc,
 322                                      struct fuse_req *req)
 323__releases(fc->lock)
 324__acquires(fc->lock)
 325{
 326        if (signal_pending(current))
 327                return;
 328
 329        spin_unlock(&fc->lock);
 330        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 331        spin_lock(&fc->lock);
 332}
 333
 334static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 335{
 336        list_add_tail(&req->intr_entry, &fc->interrupts);
 337        wake_up(&fc->waitq);
 338        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 339}
 340
 341static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 342__releases(fc->lock)
 343__acquires(fc->lock)
 344{
 345        if (!fc->no_interrupt) {
 346                /* Any signal may interrupt this */
 347                wait_answer_interruptible(fc, req);
 348
 349                if (req->aborted)
 350                        goto aborted;
 351                if (req->state == FUSE_REQ_FINISHED)
 352                        return;
 353
 354                req->interrupted = 1;
 355                if (req->state == FUSE_REQ_SENT)
 356                        queue_interrupt(fc, req);
 357        }
 358
 359        if (!req->force) {
 360                sigset_t oldset;
 361
 362                /* Only fatal signals may interrupt this */
 363                block_sigs(&oldset);
 364                wait_answer_interruptible(fc, req);
 365                restore_sigs(&oldset);
 366
 367                if (req->aborted)
 368                        goto aborted;
 369                if (req->state == FUSE_REQ_FINISHED)
 370                        return;
 371
 372                /* Request is not yet in userspace, bail out */
 373                if (req->state == FUSE_REQ_PENDING) {
 374                        list_del(&req->list);
 375                        __fuse_put_request(req);
 376                        req->out.h.error = -EINTR;
 377                        return;
 378                }
 379        }
 380
 381        /*
 382         * Either request is already in userspace, or it was forced.
 383         * Wait it out.
 384         */
 385        spin_unlock(&fc->lock);
 386        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 387        spin_lock(&fc->lock);
 388
 389        if (!req->aborted)
 390                return;
 391
 392 aborted:
 393        BUG_ON(req->state != FUSE_REQ_FINISHED);
 394        if (req->locked) {
 395                /* This is uninterruptible sleep, because data is
 396                   being copied to/from the buffers of req.  During
 397                   locked state, there mustn't be any filesystem
 398                   operation (e.g. page fault), since that could lead
 399                   to deadlock */
 400                spin_unlock(&fc->lock);
 401                wait_event(req->waitq, !req->locked);
 402                spin_lock(&fc->lock);
 403        }
 404}
 405
 406void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 407{
 408        req->isreply = 1;
 409        spin_lock(&fc->lock);
 410        if (!fc->connected)
 411                req->out.h.error = -ENOTCONN;
 412        else if (fc->conn_error)
 413                req->out.h.error = -ECONNREFUSED;
 414        else {
 415                req->in.h.unique = fuse_get_unique(fc);
 416                queue_request(fc, req);
 417                /* acquire extra reference, since request is still needed
 418                   after request_end() */
 419                __fuse_get_request(req);
 420
 421                request_wait_answer(fc, req);
 422        }
 423        spin_unlock(&fc->lock);
 424}
 425EXPORT_SYMBOL_GPL(fuse_request_send);
 426
 427static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 428                                            struct fuse_req *req)
 429{
 430        req->background = 1;
 431        fc->num_background++;
 432        if (fc->num_background == fc->max_background)
 433                fc->blocked = 1;
 434        if (fc->num_background == fc->congestion_threshold &&
 435            fc->bdi_initialized) {
 436                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 437                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 438        }
 439        list_add_tail(&req->list, &fc->bg_queue);
 440        flush_bg_queue(fc);
 441}
 442
 443static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 444{
 445        spin_lock(&fc->lock);
 446        if (fc->connected) {
 447                fuse_request_send_nowait_locked(fc, req);
 448                spin_unlock(&fc->lock);
 449        } else {
 450                req->out.h.error = -ENOTCONN;
 451                request_end(fc, req);
 452        }
 453}
 454
 455void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 456{
 457        req->isreply = 1;
 458        fuse_request_send_nowait(fc, req);
 459}
 460EXPORT_SYMBOL_GPL(fuse_request_send_background);
 461
 462static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 463                                          struct fuse_req *req, u64 unique)
 464{
 465        int err = -ENODEV;
 466
 467        req->isreply = 0;
 468        req->in.h.unique = unique;
 469        spin_lock(&fc->lock);
 470        if (fc->connected) {
 471                queue_request(fc, req);
 472                err = 0;
 473        }
 474        spin_unlock(&fc->lock);
 475
 476        return err;
 477}
 478
 479/*
 480 * Called under fc->lock
 481 *
 482 * fc->connected must have been checked previously
 483 */
 484void fuse_request_send_background_locked(struct fuse_conn *fc,
 485                                         struct fuse_req *req)
 486{
 487        req->isreply = 1;
 488        fuse_request_send_nowait_locked(fc, req);
 489}
 490
 491/*
 492 * Lock the request.  Up to the next unlock_request() there mustn't be
 493 * anything that could cause a page-fault.  If the request was already
 494 * aborted bail out.
 495 */
 496static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 497{
 498        int err = 0;
 499        if (req) {
 500                spin_lock(&fc->lock);
 501                if (req->aborted)
 502                        err = -ENOENT;
 503                else
 504                        req->locked = 1;
 505                spin_unlock(&fc->lock);
 506        }
 507        return err;
 508}
 509
 510/*
 511 * Unlock request.  If it was aborted during being locked, the
 512 * requester thread is currently waiting for it to be unlocked, so
 513 * wake it up.
 514 */
 515static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 516{
 517        if (req) {
 518                spin_lock(&fc->lock);
 519                req->locked = 0;
 520                if (req->aborted)
 521                        wake_up(&req->waitq);
 522                spin_unlock(&fc->lock);
 523        }
 524}
 525
 526struct fuse_copy_state {
 527        struct fuse_conn *fc;
 528        int write;
 529        struct fuse_req *req;
 530        const struct iovec *iov;
 531        struct pipe_buffer *pipebufs;
 532        struct pipe_buffer *currbuf;
 533        struct pipe_inode_info *pipe;
 534        unsigned long nr_segs;
 535        unsigned long seglen;
 536        unsigned long addr;
 537        struct page *pg;
 538        void *mapaddr;
 539        void *buf;
 540        unsigned len;
 541        unsigned move_pages:1;
 542};
 543
 544static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 545                           int write,
 546                           const struct iovec *iov, unsigned long nr_segs)
 547{
 548        memset(cs, 0, sizeof(*cs));
 549        cs->fc = fc;
 550        cs->write = write;
 551        cs->iov = iov;
 552        cs->nr_segs = nr_segs;
 553}
 554
 555/* Unmap and put previous page of userspace buffer */
 556static void fuse_copy_finish(struct fuse_copy_state *cs)
 557{
 558        if (cs->currbuf) {
 559                struct pipe_buffer *buf = cs->currbuf;
 560
 561                if (!cs->write) {
 562                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
 563                } else {
 564                        kunmap(buf->page);
 565                        buf->len = PAGE_SIZE - cs->len;
 566                }
 567                cs->currbuf = NULL;
 568                cs->mapaddr = NULL;
 569        } else if (cs->mapaddr) {
 570                kunmap(cs->pg);
 571                if (cs->write) {
 572                        flush_dcache_page(cs->pg);
 573                        set_page_dirty_lock(cs->pg);
 574                }
 575                put_page(cs->pg);
 576                cs->mapaddr = NULL;
 577        }
 578}
 579
 580/*
 581 * Get another pagefull of userspace buffer, and map it to kernel
 582 * address space, and lock request
 583 */
 584static int fuse_copy_fill(struct fuse_copy_state *cs)
 585{
 586        unsigned long offset;
 587        int err;
 588
 589        unlock_request(cs->fc, cs->req);
 590        fuse_copy_finish(cs);
 591        if (cs->pipebufs) {
 592                struct pipe_buffer *buf = cs->pipebufs;
 593
 594                if (!cs->write) {
 595                        err = buf->ops->confirm(cs->pipe, buf);
 596                        if (err)
 597                                return err;
 598
 599                        BUG_ON(!cs->nr_segs);
 600                        cs->currbuf = buf;
 601                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
 602                        cs->len = buf->len;
 603                        cs->buf = cs->mapaddr + buf->offset;
 604                        cs->pipebufs++;
 605                        cs->nr_segs--;
 606                } else {
 607                        struct page *page;
 608
 609                        if (cs->nr_segs == cs->pipe->buffers)
 610                                return -EIO;
 611
 612                        page = alloc_page(GFP_HIGHUSER);
 613                        if (!page)
 614                                return -ENOMEM;
 615
 616                        buf->page = page;
 617                        buf->offset = 0;
 618                        buf->len = 0;
 619
 620                        cs->currbuf = buf;
 621                        cs->mapaddr = kmap(page);
 622                        cs->buf = cs->mapaddr;
 623                        cs->len = PAGE_SIZE;
 624                        cs->pipebufs++;
 625                        cs->nr_segs++;
 626                }
 627        } else {
 628                if (!cs->seglen) {
 629                        BUG_ON(!cs->nr_segs);
 630                        cs->seglen = cs->iov[0].iov_len;
 631                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 632                        cs->iov++;
 633                        cs->nr_segs--;
 634                }
 635                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
 636                if (err < 0)
 637                        return err;
 638                BUG_ON(err != 1);
 639                offset = cs->addr % PAGE_SIZE;
 640                cs->mapaddr = kmap(cs->pg);
 641                cs->buf = cs->mapaddr + offset;
 642                cs->len = min(PAGE_SIZE - offset, cs->seglen);
 643                cs->seglen -= cs->len;
 644                cs->addr += cs->len;
 645        }
 646
 647        return lock_request(cs->fc, cs->req);
 648}
 649
 650/* Do as much copy to/from userspace buffer as we can */
 651static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 652{
 653        unsigned ncpy = min(*size, cs->len);
 654        if (val) {
 655                if (cs->write)
 656                        memcpy(cs->buf, *val, ncpy);
 657                else
 658                        memcpy(*val, cs->buf, ncpy);
 659                *val += ncpy;
 660        }
 661        *size -= ncpy;
 662        cs->len -= ncpy;
 663        cs->buf += ncpy;
 664        return ncpy;
 665}
 666
 667static int fuse_check_page(struct page *page)
 668{
 669        if (page_mapcount(page) ||
 670            page->mapping != NULL ||
 671            page_count(page) != 1 ||
 672            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 673             ~(1 << PG_locked |
 674               1 << PG_referenced |
 675               1 << PG_uptodate |
 676               1 << PG_lru |
 677               1 << PG_active |
 678               1 << PG_reclaim))) {
 679                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 680                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 681                return 1;
 682        }
 683        return 0;
 684}
 685
 686static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 687{
 688        int err;
 689        struct page *oldpage = *pagep;
 690        struct page *newpage;
 691        struct pipe_buffer *buf = cs->pipebufs;
 692        struct address_space *mapping;
 693        pgoff_t index;
 694
 695        unlock_request(cs->fc, cs->req);
 696        fuse_copy_finish(cs);
 697
 698        err = buf->ops->confirm(cs->pipe, buf);
 699        if (err)
 700                return err;
 701
 702        BUG_ON(!cs->nr_segs);
 703        cs->currbuf = buf;
 704        cs->len = buf->len;
 705        cs->pipebufs++;
 706        cs->nr_segs--;
 707
 708        if (cs->len != PAGE_SIZE)
 709                goto out_fallback;
 710
 711        if (buf->ops->steal(cs->pipe, buf) != 0)
 712                goto out_fallback;
 713
 714        newpage = buf->page;
 715
 716        if (WARN_ON(!PageUptodate(newpage)))
 717                return -EIO;
 718
 719        ClearPageMappedToDisk(newpage);
 720
 721        if (fuse_check_page(newpage) != 0)
 722                goto out_fallback_unlock;
 723
 724        mapping = oldpage->mapping;
 725        index = oldpage->index;
 726
 727        /*
 728         * This is a new and locked page, it shouldn't be mapped or
 729         * have any special flags on it
 730         */
 731        if (WARN_ON(page_mapped(oldpage)))
 732                goto out_fallback_unlock;
 733        if (WARN_ON(page_has_private(oldpage)))
 734                goto out_fallback_unlock;
 735        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 736                goto out_fallback_unlock;
 737        if (WARN_ON(PageMlocked(oldpage)))
 738                goto out_fallback_unlock;
 739
 740        remove_from_page_cache(oldpage);
 741        page_cache_release(oldpage);
 742
 743        err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
 744        if (err) {
 745                printk(KERN_WARNING "fuse_try_move_page: failed to add page");
 746                goto out_fallback_unlock;
 747        }
 748        page_cache_get(newpage);
 749
 750        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 751                lru_cache_add_file(newpage);
 752
 753        err = 0;
 754        spin_lock(&cs->fc->lock);
 755        if (cs->req->aborted)
 756                err = -ENOENT;
 757        else
 758                *pagep = newpage;
 759        spin_unlock(&cs->fc->lock);
 760
 761        if (err) {
 762                unlock_page(newpage);
 763                page_cache_release(newpage);
 764                return err;
 765        }
 766
 767        unlock_page(oldpage);
 768        page_cache_release(oldpage);
 769        cs->len = 0;
 770
 771        return 0;
 772
 773out_fallback_unlock:
 774        unlock_page(newpage);
 775out_fallback:
 776        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
 777        cs->buf = cs->mapaddr + buf->offset;
 778
 779        err = lock_request(cs->fc, cs->req);
 780        if (err)
 781                return err;
 782
 783        return 1;
 784}
 785
 786static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 787                         unsigned offset, unsigned count)
 788{
 789        struct pipe_buffer *buf;
 790
 791        if (cs->nr_segs == cs->pipe->buffers)
 792                return -EIO;
 793
 794        unlock_request(cs->fc, cs->req);
 795        fuse_copy_finish(cs);
 796
 797        buf = cs->pipebufs;
 798        page_cache_get(page);
 799        buf->page = page;
 800        buf->offset = offset;
 801        buf->len = count;
 802
 803        cs->pipebufs++;
 804        cs->nr_segs++;
 805        cs->len = 0;
 806
 807        return 0;
 808}
 809
 810/*
 811 * Copy a page in the request to/from the userspace buffer.  Must be
 812 * done atomically
 813 */
 814static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 815                          unsigned offset, unsigned count, int zeroing)
 816{
 817        int err;
 818        struct page *page = *pagep;
 819
 820        if (page && zeroing && count < PAGE_SIZE)
 821                clear_highpage(page);
 822
 823        while (count) {
 824                if (cs->write && cs->pipebufs && page) {
 825                        return fuse_ref_page(cs, page, offset, count);
 826                } else if (!cs->len) {
 827                        if (cs->move_pages && page &&
 828                            offset == 0 && count == PAGE_SIZE) {
 829                                err = fuse_try_move_page(cs, pagep);
 830                                if (err <= 0)
 831                                        return err;
 832                        } else {
 833                                err = fuse_copy_fill(cs);
 834                                if (err)
 835                                        return err;
 836                        }
 837                }
 838                if (page) {
 839                        void *mapaddr = kmap_atomic(page, KM_USER0);
 840                        void *buf = mapaddr + offset;
 841                        offset += fuse_copy_do(cs, &buf, &count);
 842                        kunmap_atomic(mapaddr, KM_USER0);
 843                } else
 844                        offset += fuse_copy_do(cs, NULL, &count);
 845        }
 846        if (page && !cs->write)
 847                flush_dcache_page(page);
 848        return 0;
 849}
 850
 851/* Copy pages in the request to/from userspace buffer */
 852static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 853                           int zeroing)
 854{
 855        unsigned i;
 856        struct fuse_req *req = cs->req;
 857        unsigned offset = req->page_offset;
 858        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
 859
 860        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 861                int err;
 862
 863                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 864                                     zeroing);
 865                if (err)
 866                        return err;
 867
 868                nbytes -= count;
 869                count = min(nbytes, (unsigned) PAGE_SIZE);
 870                offset = 0;
 871        }
 872        return 0;
 873}
 874
 875/* Copy a single argument in the request to/from userspace buffer */
 876static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 877{
 878        while (size) {
 879                if (!cs->len) {
 880                        int err = fuse_copy_fill(cs);
 881                        if (err)
 882                                return err;
 883                }
 884                fuse_copy_do(cs, &val, &size);
 885        }
 886        return 0;
 887}
 888
 889/* Copy request arguments to/from userspace buffer */
 890static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 891                          unsigned argpages, struct fuse_arg *args,
 892                          int zeroing)
 893{
 894        int err = 0;
 895        unsigned i;
 896
 897        for (i = 0; !err && i < numargs; i++)  {
 898                struct fuse_arg *arg = &args[i];
 899                if (i == numargs - 1 && argpages)
 900                        err = fuse_copy_pages(cs, arg->size, zeroing);
 901                else
 902                        err = fuse_copy_one(cs, arg->value, arg->size);
 903        }
 904        return err;
 905}
 906
 907static int forget_pending(struct fuse_conn *fc)
 908{
 909        return fc->forget_list_head.next != NULL;
 910}
 911
 912static int request_pending(struct fuse_conn *fc)
 913{
 914        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
 915                forget_pending(fc);
 916}
 917
 918/* Wait until a request is available on the pending list */
 919static void request_wait(struct fuse_conn *fc)
 920__releases(fc->lock)
 921__acquires(fc->lock)
 922{
 923        DECLARE_WAITQUEUE(wait, current);
 924
 925        add_wait_queue_exclusive(&fc->waitq, &wait);
 926        while (fc->connected && !request_pending(fc)) {
 927                set_current_state(TASK_INTERRUPTIBLE);
 928                if (signal_pending(current))
 929                        break;
 930
 931                spin_unlock(&fc->lock);
 932                schedule();
 933                spin_lock(&fc->lock);
 934        }
 935        set_current_state(TASK_RUNNING);
 936        remove_wait_queue(&fc->waitq, &wait);
 937}
 938
 939/*
 940 * Transfer an interrupt request to userspace
 941 *
 942 * Unlike other requests this is assembled on demand, without a need
 943 * to allocate a separate fuse_req structure.
 944 *
 945 * Called with fc->lock held, releases it
 946 */
 947static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
 948                               size_t nbytes, struct fuse_req *req)
 949__releases(fc->lock)
 950{
 951        struct fuse_in_header ih;
 952        struct fuse_interrupt_in arg;
 953        unsigned reqsize = sizeof(ih) + sizeof(arg);
 954        int err;
 955
 956        list_del_init(&req->intr_entry);
 957        req->intr_unique = fuse_get_unique(fc);
 958        memset(&ih, 0, sizeof(ih));
 959        memset(&arg, 0, sizeof(arg));
 960        ih.len = reqsize;
 961        ih.opcode = FUSE_INTERRUPT;
 962        ih.unique = req->intr_unique;
 963        arg.unique = req->in.h.unique;
 964
 965        spin_unlock(&fc->lock);
 966        if (nbytes < reqsize)
 967                return -EINVAL;
 968
 969        err = fuse_copy_one(cs, &ih, sizeof(ih));
 970        if (!err)
 971                err = fuse_copy_one(cs, &arg, sizeof(arg));
 972        fuse_copy_finish(cs);
 973
 974        return err ? err : reqsize;
 975}
 976
 977static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
 978                                               unsigned max,
 979                                               unsigned *countp)
 980{
 981        struct fuse_forget_link *head = fc->forget_list_head.next;
 982        struct fuse_forget_link **newhead = &head;
 983        unsigned count;
 984
 985        for (count = 0; *newhead != NULL && count < max; count++)
 986                newhead = &(*newhead)->next;
 987
 988        fc->forget_list_head.next = *newhead;
 989        *newhead = NULL;
 990        if (fc->forget_list_head.next == NULL)
 991                fc->forget_list_tail = &fc->forget_list_head;
 992
 993        if (countp != NULL)
 994                *countp = count;
 995
 996        return head;
 997}
 998
 999static int fuse_read_single_forget(struct fuse_conn *fc,
1000                                   struct fuse_copy_state *cs,
1001                                   size_t nbytes)
1002__releases(fc->lock)
1003{
1004        int err;
1005        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1006        struct fuse_forget_in arg = {
1007                .nlookup = forget->forget_one.nlookup,
1008        };
1009        struct fuse_in_header ih = {
1010                .opcode = FUSE_FORGET,
1011                .nodeid = forget->forget_one.nodeid,
1012                .unique = fuse_get_unique(fc),
1013                .len = sizeof(ih) + sizeof(arg),
1014        };
1015
1016        spin_unlock(&fc->lock);
1017        kfree(forget);
1018        if (nbytes < ih.len)
1019                return -EINVAL;
1020
1021        err = fuse_copy_one(cs, &ih, sizeof(ih));
1022        if (!err)
1023                err = fuse_copy_one(cs, &arg, sizeof(arg));
1024        fuse_copy_finish(cs);
1025
1026        if (err)
1027                return err;
1028
1029        return ih.len;
1030}
1031
1032static int fuse_read_batch_forget(struct fuse_conn *fc,
1033                                   struct fuse_copy_state *cs, size_t nbytes)
1034__releases(fc->lock)
1035{
1036        int err;
1037        unsigned max_forgets;
1038        unsigned count;
1039        struct fuse_forget_link *head;
1040        struct fuse_batch_forget_in arg = { .count = 0 };
1041        struct fuse_in_header ih = {
1042                .opcode = FUSE_BATCH_FORGET,
1043                .unique = fuse_get_unique(fc),
1044                .len = sizeof(ih) + sizeof(arg),
1045        };
1046
1047        if (nbytes < ih.len) {
1048                spin_unlock(&fc->lock);
1049                return -EINVAL;
1050        }
1051
1052        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1053        head = dequeue_forget(fc, max_forgets, &count);
1054        spin_unlock(&fc->lock);
1055
1056        arg.count = count;
1057        ih.len += count * sizeof(struct fuse_forget_one);
1058        err = fuse_copy_one(cs, &ih, sizeof(ih));
1059        if (!err)
1060                err = fuse_copy_one(cs, &arg, sizeof(arg));
1061
1062        while (head) {
1063                struct fuse_forget_link *forget = head;
1064
1065                if (!err) {
1066                        err = fuse_copy_one(cs, &forget->forget_one,
1067                                            sizeof(forget->forget_one));
1068                }
1069                head = forget->next;
1070                kfree(forget);
1071        }
1072
1073        fuse_copy_finish(cs);
1074
1075        if (err)
1076                return err;
1077
1078        return ih.len;
1079}
1080
1081static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1082                            size_t nbytes)
1083__releases(fc->lock)
1084{
1085        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1086                return fuse_read_single_forget(fc, cs, nbytes);
1087        else
1088                return fuse_read_batch_forget(fc, cs, nbytes);
1089}
1090
1091/*
1092 * Read a single request into the userspace filesystem's buffer.  This
1093 * function waits until a request is available, then removes it from
1094 * the pending list and copies request data to userspace buffer.  If
1095 * no reply is needed (FORGET) or request has been aborted or there
1096 * was an error during the copying then it's finished by calling
1097 * request_end().  Otherwise add it to the processing list, and set
1098 * the 'sent' flag.
1099 */
1100static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1101                                struct fuse_copy_state *cs, size_t nbytes)
1102{
1103        int err;
1104        struct fuse_req *req;
1105        struct fuse_in *in;
1106        unsigned reqsize;
1107
1108 restart:
1109        spin_lock(&fc->lock);
1110        err = -EAGAIN;
1111        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1112            !request_pending(fc))
1113                goto err_unlock;
1114
1115        request_wait(fc);
1116        err = -ENODEV;
1117        if (!fc->connected)
1118                goto err_unlock;
1119        err = -ERESTARTSYS;
1120        if (!request_pending(fc))
1121                goto err_unlock;
1122
1123        if (!list_empty(&fc->interrupts)) {
1124                req = list_entry(fc->interrupts.next, struct fuse_req,
1125                                 intr_entry);
1126                return fuse_read_interrupt(fc, cs, nbytes, req);
1127        }
1128
1129        if (forget_pending(fc)) {
1130                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1131                        return fuse_read_forget(fc, cs, nbytes);
1132
1133                if (fc->forget_batch <= -8)
1134                        fc->forget_batch = 16;
1135        }
1136
1137        req = list_entry(fc->pending.next, struct fuse_req, list);
1138        req->state = FUSE_REQ_READING;
1139        list_move(&req->list, &fc->io);
1140
1141        in = &req->in;
1142        reqsize = in->h.len;
1143        /* If request is too large, reply with an error and restart the read */
1144        if (nbytes < reqsize) {
1145                req->out.h.error = -EIO;
1146                /* SETXATTR is special, since it may contain too large data */
1147                if (in->h.opcode == FUSE_SETXATTR)
1148                        req->out.h.error = -E2BIG;
1149                request_end(fc, req);
1150                goto restart;
1151        }
1152        spin_unlock(&fc->lock);
1153        cs->req = req;
1154        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1155        if (!err)
1156                err = fuse_copy_args(cs, in->numargs, in->argpages,
1157                                     (struct fuse_arg *) in->args, 0);
1158        fuse_copy_finish(cs);
1159        spin_lock(&fc->lock);
1160        req->locked = 0;
1161        if (req->aborted) {
1162                request_end(fc, req);
1163                return -ENODEV;
1164        }
1165        if (err) {
1166                req->out.h.error = -EIO;
1167                request_end(fc, req);
1168                return err;
1169        }
1170        if (!req->isreply)
1171                request_end(fc, req);
1172        else {
1173                req->state = FUSE_REQ_SENT;
1174                list_move_tail(&req->list, &fc->processing);
1175                if (req->interrupted)
1176                        queue_interrupt(fc, req);
1177                spin_unlock(&fc->lock);
1178        }
1179        return reqsize;
1180
1181 err_unlock:
1182        spin_unlock(&fc->lock);
1183        return err;
1184}
1185
1186static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1187                              unsigned long nr_segs, loff_t pos)
1188{
1189        struct fuse_copy_state cs;
1190        struct file *file = iocb->ki_filp;
1191        struct fuse_conn *fc = fuse_get_conn(file);
1192        if (!fc)
1193                return -EPERM;
1194
1195        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1196
1197        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1198}
1199
1200static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1201                                   struct pipe_buffer *buf)
1202{
1203        return 1;
1204}
1205
1206static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1207        .can_merge = 0,
1208        .map = generic_pipe_buf_map,
1209        .unmap = generic_pipe_buf_unmap,
1210        .confirm = generic_pipe_buf_confirm,
1211        .release = generic_pipe_buf_release,
1212        .steal = fuse_dev_pipe_buf_steal,
1213        .get = generic_pipe_buf_get,
1214};
1215
1216static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1217                                    struct pipe_inode_info *pipe,
1218                                    size_t len, unsigned int flags)
1219{
1220        int ret;
1221        int page_nr = 0;
1222        int do_wakeup = 0;
1223        struct pipe_buffer *bufs;
1224        struct fuse_copy_state cs;
1225        struct fuse_conn *fc = fuse_get_conn(in);
1226        if (!fc)
1227                return -EPERM;
1228
1229        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1230        if (!bufs)
1231                return -ENOMEM;
1232
1233        fuse_copy_init(&cs, fc, 1, NULL, 0);
1234        cs.pipebufs = bufs;
1235        cs.pipe = pipe;
1236        ret = fuse_dev_do_read(fc, in, &cs, len);
1237        if (ret < 0)
1238                goto out;
1239
1240        ret = 0;
1241        pipe_lock(pipe);
1242
1243        if (!pipe->readers) {
1244                send_sig(SIGPIPE, current, 0);
1245                if (!ret)
1246                        ret = -EPIPE;
1247                goto out_unlock;
1248        }
1249
1250        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1251                ret = -EIO;
1252                goto out_unlock;
1253        }
1254
1255        while (page_nr < cs.nr_segs) {
1256                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1257                struct pipe_buffer *buf = pipe->bufs + newbuf;
1258
1259                buf->page = bufs[page_nr].page;
1260                buf->offset = bufs[page_nr].offset;
1261                buf->len = bufs[page_nr].len;
1262                buf->ops = &fuse_dev_pipe_buf_ops;
1263
1264                pipe->nrbufs++;
1265                page_nr++;
1266                ret += buf->len;
1267
1268                if (pipe->inode)
1269                        do_wakeup = 1;
1270        }
1271
1272out_unlock:
1273        pipe_unlock(pipe);
1274
1275        if (do_wakeup) {
1276                smp_mb();
1277                if (waitqueue_active(&pipe->wait))
1278                        wake_up_interruptible(&pipe->wait);
1279                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1280        }
1281
1282out:
1283        for (; page_nr < cs.nr_segs; page_nr++)
1284                page_cache_release(bufs[page_nr].page);
1285
1286        kfree(bufs);
1287        return ret;
1288}
1289
1290static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1291                            struct fuse_copy_state *cs)
1292{
1293        struct fuse_notify_poll_wakeup_out outarg;
1294        int err = -EINVAL;
1295
1296        if (size != sizeof(outarg))
1297                goto err;
1298
1299        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1300        if (err)
1301                goto err;
1302
1303        fuse_copy_finish(cs);
1304        return fuse_notify_poll_wakeup(fc, &outarg);
1305
1306err:
1307        fuse_copy_finish(cs);
1308        return err;
1309}
1310
1311static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1312                                   struct fuse_copy_state *cs)
1313{
1314        struct fuse_notify_inval_inode_out outarg;
1315        int err = -EINVAL;
1316
1317        if (size != sizeof(outarg))
1318                goto err;
1319
1320        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1321        if (err)
1322                goto err;
1323        fuse_copy_finish(cs);
1324
1325        down_read(&fc->killsb);
1326        err = -ENOENT;
1327        if (fc->sb) {
1328                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1329                                               outarg.off, outarg.len);
1330        }
1331        up_read(&fc->killsb);
1332        return err;
1333
1334err:
1335        fuse_copy_finish(cs);
1336        return err;
1337}
1338
1339static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1340                                   struct fuse_copy_state *cs)
1341{
1342        struct fuse_notify_inval_entry_out outarg;
1343        int err = -ENOMEM;
1344        char *buf;
1345        struct qstr name;
1346
1347        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1348        if (!buf)
1349                goto err;
1350
1351        err = -EINVAL;
1352        if (size < sizeof(outarg))
1353                goto err;
1354
1355        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1356        if (err)
1357                goto err;
1358
1359        err = -ENAMETOOLONG;
1360        if (outarg.namelen > FUSE_NAME_MAX)
1361                goto err;
1362
1363        name.name = buf;
1364        name.len = outarg.namelen;
1365        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1366        if (err)
1367                goto err;
1368        fuse_copy_finish(cs);
1369        buf[outarg.namelen] = 0;
1370        name.hash = full_name_hash(name.name, name.len);
1371
1372        down_read(&fc->killsb);
1373        err = -ENOENT;
1374        if (fc->sb)
1375                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
1376        up_read(&fc->killsb);
1377        kfree(buf);
1378        return err;
1379
1380err:
1381        kfree(buf);
1382        fuse_copy_finish(cs);
1383        return err;
1384}
1385
1386static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1387                             struct fuse_copy_state *cs)
1388{
1389        struct fuse_notify_store_out outarg;
1390        struct inode *inode;
1391        struct address_space *mapping;
1392        u64 nodeid;
1393        int err;
1394        pgoff_t index;
1395        unsigned int offset;
1396        unsigned int num;
1397        loff_t file_size;
1398        loff_t end;
1399
1400        err = -EINVAL;
1401        if (size < sizeof(outarg))
1402                goto out_finish;
1403
1404        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1405        if (err)
1406                goto out_finish;
1407
1408        err = -EINVAL;
1409        if (size - sizeof(outarg) != outarg.size)
1410                goto out_finish;
1411
1412        nodeid = outarg.nodeid;
1413
1414        down_read(&fc->killsb);
1415
1416        err = -ENOENT;
1417        if (!fc->sb)
1418                goto out_up_killsb;
1419
1420        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1421        if (!inode)
1422                goto out_up_killsb;
1423
1424        mapping = inode->i_mapping;
1425        index = outarg.offset >> PAGE_CACHE_SHIFT;
1426        offset = outarg.offset & ~PAGE_CACHE_MASK;
1427        file_size = i_size_read(inode);
1428        end = outarg.offset + outarg.size;
1429        if (end > file_size) {
1430                file_size = end;
1431                fuse_write_update_size(inode, file_size);
1432        }
1433
1434        num = outarg.size;
1435        while (num) {
1436                struct page *page;
1437                unsigned int this_num;
1438
1439                err = -ENOMEM;
1440                page = find_or_create_page(mapping, index,
1441                                           mapping_gfp_mask(mapping));
1442                if (!page)
1443                        goto out_iput;
1444
1445                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1446                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1447                if (!err && offset == 0 && (num != 0 || file_size == end))
1448                        SetPageUptodate(page);
1449                unlock_page(page);
1450                page_cache_release(page);
1451
1452                if (err)
1453                        goto out_iput;
1454
1455                num -= this_num;
1456                offset = 0;
1457                index++;
1458        }
1459
1460        err = 0;
1461
1462out_iput:
1463        iput(inode);
1464out_up_killsb:
1465        up_read(&fc->killsb);
1466out_finish:
1467        fuse_copy_finish(cs);
1468        return err;
1469}
1470
1471static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1472{
1473        release_pages(req->pages, req->num_pages, 0);
1474}
1475
1476static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1477                         struct fuse_notify_retrieve_out *outarg)
1478{
1479        int err;
1480        struct address_space *mapping = inode->i_mapping;
1481        struct fuse_req *req;
1482        pgoff_t index;
1483        loff_t file_size;
1484        unsigned int num;
1485        unsigned int offset;
1486        size_t total_len = 0;
1487
1488        req = fuse_get_req(fc);
1489        if (IS_ERR(req))
1490                return PTR_ERR(req);
1491
1492        offset = outarg->offset & ~PAGE_CACHE_MASK;
1493
1494        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1495        req->in.h.nodeid = outarg->nodeid;
1496        req->in.numargs = 2;
1497        req->in.argpages = 1;
1498        req->page_offset = offset;
1499        req->end = fuse_retrieve_end;
1500
1501        index = outarg->offset >> PAGE_CACHE_SHIFT;
1502        file_size = i_size_read(inode);
1503        num = outarg->size;
1504        if (outarg->offset > file_size)
1505                num = 0;
1506        else if (outarg->offset + num > file_size)
1507                num = file_size - outarg->offset;
1508
1509        while (num) {
1510                struct page *page;
1511                unsigned int this_num;
1512
1513                page = find_get_page(mapping, index);
1514                if (!page)
1515                        break;
1516
1517                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1518                req->pages[req->num_pages] = page;
1519                req->num_pages++;
1520
1521                num -= this_num;
1522                total_len += this_num;
1523        }
1524        req->misc.retrieve_in.offset = outarg->offset;
1525        req->misc.retrieve_in.size = total_len;
1526        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1527        req->in.args[0].value = &req->misc.retrieve_in;
1528        req->in.args[1].size = total_len;
1529
1530        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1531        if (err)
1532                fuse_retrieve_end(fc, req);
1533
1534        return err;
1535}
1536
1537static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1538                                struct fuse_copy_state *cs)
1539{
1540        struct fuse_notify_retrieve_out outarg;
1541        struct inode *inode;
1542        int err;
1543
1544        err = -EINVAL;
1545        if (size != sizeof(outarg))
1546                goto copy_finish;
1547
1548        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1549        if (err)
1550                goto copy_finish;
1551
1552        fuse_copy_finish(cs);
1553
1554        down_read(&fc->killsb);
1555        err = -ENOENT;
1556        if (fc->sb) {
1557                u64 nodeid = outarg.nodeid;
1558
1559                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1560                if (inode) {
1561                        err = fuse_retrieve(fc, inode, &outarg);
1562                        iput(inode);
1563                }
1564        }
1565        up_read(&fc->killsb);
1566
1567        return err;
1568
1569copy_finish:
1570        fuse_copy_finish(cs);
1571        return err;
1572}
1573
1574static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1575                       unsigned int size, struct fuse_copy_state *cs)
1576{
1577        switch (code) {
1578        case FUSE_NOTIFY_POLL:
1579                return fuse_notify_poll(fc, size, cs);
1580
1581        case FUSE_NOTIFY_INVAL_INODE:
1582                return fuse_notify_inval_inode(fc, size, cs);
1583
1584        case FUSE_NOTIFY_INVAL_ENTRY:
1585                return fuse_notify_inval_entry(fc, size, cs);
1586
1587        case FUSE_NOTIFY_STORE:
1588                return fuse_notify_store(fc, size, cs);
1589
1590        case FUSE_NOTIFY_RETRIEVE:
1591                return fuse_notify_retrieve(fc, size, cs);
1592
1593        default:
1594                fuse_copy_finish(cs);
1595                return -EINVAL;
1596        }
1597}
1598
1599/* Look up request on processing list by unique ID */
1600static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1601{
1602        struct list_head *entry;
1603
1604        list_for_each(entry, &fc->processing) {
1605                struct fuse_req *req;
1606                req = list_entry(entry, struct fuse_req, list);
1607                if (req->in.h.unique == unique || req->intr_unique == unique)
1608                        return req;
1609        }
1610        return NULL;
1611}
1612
1613static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1614                         unsigned nbytes)
1615{
1616        unsigned reqsize = sizeof(struct fuse_out_header);
1617
1618        if (out->h.error)
1619                return nbytes != reqsize ? -EINVAL : 0;
1620
1621        reqsize += len_args(out->numargs, out->args);
1622
1623        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1624                return -EINVAL;
1625        else if (reqsize > nbytes) {
1626                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1627                unsigned diffsize = reqsize - nbytes;
1628                if (diffsize > lastarg->size)
1629                        return -EINVAL;
1630                lastarg->size -= diffsize;
1631        }
1632        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1633                              out->page_zeroing);
1634}
1635
1636/*
1637 * Write a single reply to a request.  First the header is copied from
1638 * the write buffer.  The request is then searched on the processing
1639 * list by the unique ID found in the header.  If found, then remove
1640 * it from the list and copy the rest of the buffer to the request.
1641 * The request is finished by calling request_end()
1642 */
1643static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1644                                 struct fuse_copy_state *cs, size_t nbytes)
1645{
1646        int err;
1647        struct fuse_req *req;
1648        struct fuse_out_header oh;
1649
1650        if (nbytes < sizeof(struct fuse_out_header))
1651                return -EINVAL;
1652
1653        err = fuse_copy_one(cs, &oh, sizeof(oh));
1654        if (err)
1655                goto err_finish;
1656
1657        err = -EINVAL;
1658        if (oh.len != nbytes)
1659                goto err_finish;
1660
1661        /*
1662         * Zero oh.unique indicates unsolicited notification message
1663         * and error contains notification code.
1664         */
1665        if (!oh.unique) {
1666                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1667                return err ? err : nbytes;
1668        }
1669
1670        err = -EINVAL;
1671        if (oh.error <= -1000 || oh.error > 0)
1672                goto err_finish;
1673
1674        spin_lock(&fc->lock);
1675        err = -ENOENT;
1676        if (!fc->connected)
1677                goto err_unlock;
1678
1679        req = request_find(fc, oh.unique);
1680        if (!req)
1681                goto err_unlock;
1682
1683        if (req->aborted) {
1684                spin_unlock(&fc->lock);
1685                fuse_copy_finish(cs);
1686                spin_lock(&fc->lock);
1687                request_end(fc, req);
1688                return -ENOENT;
1689        }
1690        /* Is it an interrupt reply? */
1691        if (req->intr_unique == oh.unique) {
1692                err = -EINVAL;
1693                if (nbytes != sizeof(struct fuse_out_header))
1694                        goto err_unlock;
1695
1696                if (oh.error == -ENOSYS)
1697                        fc->no_interrupt = 1;
1698                else if (oh.error == -EAGAIN)
1699                        queue_interrupt(fc, req);
1700
1701                spin_unlock(&fc->lock);
1702                fuse_copy_finish(cs);
1703                return nbytes;
1704        }
1705
1706        req->state = FUSE_REQ_WRITING;
1707        list_move(&req->list, &fc->io);
1708        req->out.h = oh;
1709        req->locked = 1;
1710        cs->req = req;
1711        if (!req->out.page_replace)
1712                cs->move_pages = 0;
1713        spin_unlock(&fc->lock);
1714
1715        err = copy_out_args(cs, &req->out, nbytes);
1716        fuse_copy_finish(cs);
1717
1718        spin_lock(&fc->lock);
1719        req->locked = 0;
1720        if (!err) {
1721                if (req->aborted)
1722                        err = -ENOENT;
1723        } else if (!req->aborted)
1724                req->out.h.error = -EIO;
1725        request_end(fc, req);
1726
1727        return err ? err : nbytes;
1728
1729 err_unlock:
1730        spin_unlock(&fc->lock);
1731 err_finish:
1732        fuse_copy_finish(cs);
1733        return err;
1734}
1735
1736static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1737                              unsigned long nr_segs, loff_t pos)
1738{
1739        struct fuse_copy_state cs;
1740        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1741        if (!fc)
1742                return -EPERM;
1743
1744        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1745
1746        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1747}
1748
1749static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1750                                     struct file *out, loff_t *ppos,
1751                                     size_t len, unsigned int flags)
1752{
1753        unsigned nbuf;
1754        unsigned idx;
1755        struct pipe_buffer *bufs;
1756        struct fuse_copy_state cs;
1757        struct fuse_conn *fc;
1758        size_t rem;
1759        ssize_t ret;
1760
1761        fc = fuse_get_conn(out);
1762        if (!fc)
1763                return -EPERM;
1764
1765        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1766        if (!bufs)
1767                return -ENOMEM;
1768
1769        pipe_lock(pipe);
1770        nbuf = 0;
1771        rem = 0;
1772        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1773                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1774
1775        ret = -EINVAL;
1776        if (rem < len) {
1777                pipe_unlock(pipe);
1778                goto out;
1779        }
1780
1781        rem = len;
1782        while (rem) {
1783                struct pipe_buffer *ibuf;
1784                struct pipe_buffer *obuf;
1785
1786                BUG_ON(nbuf >= pipe->buffers);
1787                BUG_ON(!pipe->nrbufs);
1788                ibuf = &pipe->bufs[pipe->curbuf];
1789                obuf = &bufs[nbuf];
1790
1791                if (rem >= ibuf->len) {
1792                        *obuf = *ibuf;
1793                        ibuf->ops = NULL;
1794                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1795                        pipe->nrbufs--;
1796                } else {
1797                        ibuf->ops->get(pipe, ibuf);
1798                        *obuf = *ibuf;
1799                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1800                        obuf->len = rem;
1801                        ibuf->offset += obuf->len;
1802                        ibuf->len -= obuf->len;
1803                }
1804                nbuf++;
1805                rem -= obuf->len;
1806        }
1807        pipe_unlock(pipe);
1808
1809        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1810        cs.pipebufs = bufs;
1811        cs.pipe = pipe;
1812
1813        if (flags & SPLICE_F_MOVE)
1814                cs.move_pages = 1;
1815
1816        ret = fuse_dev_do_write(fc, &cs, len);
1817
1818        for (idx = 0; idx < nbuf; idx++) {
1819                struct pipe_buffer *buf = &bufs[idx];
1820                buf->ops->release(pipe, buf);
1821        }
1822out:
1823        kfree(bufs);
1824        return ret;
1825}
1826
1827static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1828{
1829        unsigned mask = POLLOUT | POLLWRNORM;
1830        struct fuse_conn *fc = fuse_get_conn(file);
1831        if (!fc)
1832                return POLLERR;
1833
1834        poll_wait(file, &fc->waitq, wait);
1835
1836        spin_lock(&fc->lock);
1837        if (!fc->connected)
1838                mask = POLLERR;
1839        else if (request_pending(fc))
1840                mask |= POLLIN | POLLRDNORM;
1841        spin_unlock(&fc->lock);
1842
1843        return mask;
1844}
1845
1846/*
1847 * Abort all requests on the given list (pending or processing)
1848 *
1849 * This function releases and reacquires fc->lock
1850 */
1851static void end_requests(struct fuse_conn *fc, struct list_head *head)
1852__releases(fc->lock)
1853__acquires(fc->lock)
1854{
1855        while (!list_empty(head)) {
1856                struct fuse_req *req;
1857                req = list_entry(head->next, struct fuse_req, list);
1858                req->out.h.error = -ECONNABORTED;
1859                request_end(fc, req);
1860                spin_lock(&fc->lock);
1861        }
1862}
1863
1864/*
1865 * Abort requests under I/O
1866 *
1867 * The requests are set to aborted and finished, and the request
1868 * waiter is woken up.  This will make request_wait_answer() wait
1869 * until the request is unlocked and then return.
1870 *
1871 * If the request is asynchronous, then the end function needs to be
1872 * called after waiting for the request to be unlocked (if it was
1873 * locked).
1874 */
1875static void end_io_requests(struct fuse_conn *fc)
1876__releases(fc->lock)
1877__acquires(fc->lock)
1878{
1879        while (!list_empty(&fc->io)) {
1880                struct fuse_req *req =
1881                        list_entry(fc->io.next, struct fuse_req, list);
1882                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1883
1884                req->aborted = 1;
1885                req->out.h.error = -ECONNABORTED;
1886                req->state = FUSE_REQ_FINISHED;
1887                list_del_init(&req->list);
1888                wake_up(&req->waitq);
1889                if (end) {
1890                        req->end = NULL;
1891                        __fuse_get_request(req);
1892                        spin_unlock(&fc->lock);
1893                        wait_event(req->waitq, !req->locked);
1894                        end(fc, req);
1895                        fuse_put_request(fc, req);
1896                        spin_lock(&fc->lock);
1897                }
1898        }
1899}
1900
1901static void end_queued_requests(struct fuse_conn *fc)
1902__releases(fc->lock)
1903__acquires(fc->lock)
1904{
1905        fc->max_background = UINT_MAX;
1906        flush_bg_queue(fc);
1907        end_requests(fc, &fc->pending);
1908        end_requests(fc, &fc->processing);
1909        while (forget_pending(fc))
1910                kfree(dequeue_forget(fc, 1, NULL));
1911}
1912
1913/*
1914 * Abort all requests.
1915 *
1916 * Emergency exit in case of a malicious or accidental deadlock, or
1917 * just a hung filesystem.
1918 *
1919 * The same effect is usually achievable through killing the
1920 * filesystem daemon and all users of the filesystem.  The exception
1921 * is the combination of an asynchronous request and the tricky
1922 * deadlock (see Documentation/filesystems/fuse.txt).
1923 *
1924 * During the aborting, progression of requests from the pending and
1925 * processing lists onto the io list, and progression of new requests
1926 * onto the pending list is prevented by req->connected being false.
1927 *
1928 * Progression of requests under I/O to the processing list is
1929 * prevented by the req->aborted flag being true for these requests.
1930 * For this reason requests on the io list must be aborted first.
1931 */
1932void fuse_abort_conn(struct fuse_conn *fc)
1933{
1934        spin_lock(&fc->lock);
1935        if (fc->connected) {
1936                fc->connected = 0;
1937                fc->blocked = 0;
1938                end_io_requests(fc);
1939                end_queued_requests(fc);
1940                wake_up_all(&fc->waitq);
1941                wake_up_all(&fc->blocked_waitq);
1942                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1943        }
1944        spin_unlock(&fc->lock);
1945}
1946EXPORT_SYMBOL_GPL(fuse_abort_conn);
1947
1948int fuse_dev_release(struct inode *inode, struct file *file)
1949{
1950        struct fuse_conn *fc = fuse_get_conn(file);
1951        if (fc) {
1952                spin_lock(&fc->lock);
1953                fc->connected = 0;
1954                fc->blocked = 0;
1955                end_queued_requests(fc);
1956                wake_up_all(&fc->blocked_waitq);
1957                spin_unlock(&fc->lock);
1958                fuse_conn_put(fc);
1959        }
1960
1961        return 0;
1962}
1963EXPORT_SYMBOL_GPL(fuse_dev_release);
1964
1965static int fuse_dev_fasync(int fd, struct file *file, int on)
1966{
1967        struct fuse_conn *fc = fuse_get_conn(file);
1968        if (!fc)
1969                return -EPERM;
1970
1971        /* No locking - fasync_helper does its own locking */
1972        return fasync_helper(fd, file, on, &fc->fasync);
1973}
1974
1975const struct file_operations fuse_dev_operations = {
1976        .owner          = THIS_MODULE,
1977        .llseek         = no_llseek,
1978        .read           = do_sync_read,
1979        .aio_read       = fuse_dev_read,
1980        .splice_read    = fuse_dev_splice_read,
1981        .write          = do_sync_write,
1982        .aio_write      = fuse_dev_write,
1983        .splice_write   = fuse_dev_splice_write,
1984        .poll           = fuse_dev_poll,
1985        .release        = fuse_dev_release,
1986        .fasync         = fuse_dev_fasync,
1987};
1988EXPORT_SYMBOL_GPL(fuse_dev_operations);
1989
1990static struct miscdevice fuse_miscdevice = {
1991        .minor = FUSE_MINOR,
1992        .name  = "fuse",
1993        .fops = &fuse_dev_operations,
1994};
1995
1996int __init fuse_dev_init(void)
1997{
1998        int err = -ENOMEM;
1999        fuse_req_cachep = kmem_cache_create("fuse_request",
2000                                            sizeof(struct fuse_req),
2001                                            0, 0, NULL);
2002        if (!fuse_req_cachep)
2003                goto out;
2004
2005        err = misc_register(&fuse_miscdevice);
2006        if (err)
2007                goto out_cache_clean;
2008
2009        return 0;
2010
2011 out_cache_clean:
2012        kmem_cache_destroy(fuse_req_cachep);
2013 out:
2014        return err;
2015}
2016
2017void fuse_dev_cleanup(void)
2018{
2019        misc_deregister(&fuse_miscdevice);
2020        kmem_cache_destroy(fuse_req_cachep);
2021}
2022