linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22
  23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  24MODULE_ALIAS("devname:fuse");
  25
  26static struct kmem_cache *fuse_req_cachep;
  27
  28static struct fuse_conn *fuse_get_conn(struct file *file)
  29{
  30        /*
  31         * Lockless access is OK, because file->private data is set
  32         * once during mount and is valid until the file is released.
  33         */
  34        return file->private_data;
  35}
  36
  37static void fuse_request_init(struct fuse_req *req)
  38{
  39        memset(req, 0, sizeof(*req));
  40        INIT_LIST_HEAD(&req->list);
  41        INIT_LIST_HEAD(&req->intr_entry);
  42        init_waitqueue_head(&req->waitq);
  43        atomic_set(&req->count, 1);
  44}
  45
  46struct fuse_req *fuse_request_alloc(void)
  47{
  48        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
  49        if (req)
  50                fuse_request_init(req);
  51        return req;
  52}
  53EXPORT_SYMBOL_GPL(fuse_request_alloc);
  54
  55struct fuse_req *fuse_request_alloc_nofs(void)
  56{
  57        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
  58        if (req)
  59                fuse_request_init(req);
  60        return req;
  61}
  62
  63void fuse_request_free(struct fuse_req *req)
  64{
  65        kmem_cache_free(fuse_req_cachep, req);
  66}
  67
  68static void block_sigs(sigset_t *oldset)
  69{
  70        sigset_t mask;
  71
  72        siginitsetinv(&mask, sigmask(SIGKILL));
  73        sigprocmask(SIG_BLOCK, &mask, oldset);
  74}
  75
  76static void restore_sigs(sigset_t *oldset)
  77{
  78        sigprocmask(SIG_SETMASK, oldset, NULL);
  79}
  80
  81static void __fuse_get_request(struct fuse_req *req)
  82{
  83        atomic_inc(&req->count);
  84}
  85
  86/* Must be called with > 1 refcount */
  87static void __fuse_put_request(struct fuse_req *req)
  88{
  89        BUG_ON(atomic_read(&req->count) < 2);
  90        atomic_dec(&req->count);
  91}
  92
  93static void fuse_req_init_context(struct fuse_req *req)
  94{
  95        req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
  96        req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
  97        req->in.h.pid = current->pid;
  98}
  99
 100struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 101{
 102        struct fuse_req *req;
 103        sigset_t oldset;
 104        int intr;
 105        int err;
 106
 107        atomic_inc(&fc->num_waiting);
 108        block_sigs(&oldset);
 109        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 110        restore_sigs(&oldset);
 111        err = -EINTR;
 112        if (intr)
 113                goto out;
 114
 115        err = -ENOTCONN;
 116        if (!fc->connected)
 117                goto out;
 118
 119        req = fuse_request_alloc();
 120        err = -ENOMEM;
 121        if (!req)
 122                goto out;
 123
 124        fuse_req_init_context(req);
 125        req->waiting = 1;
 126        return req;
 127
 128 out:
 129        atomic_dec(&fc->num_waiting);
 130        return ERR_PTR(err);
 131}
 132EXPORT_SYMBOL_GPL(fuse_get_req);
 133
 134/*
 135 * Return request in fuse_file->reserved_req.  However that may
 136 * currently be in use.  If that is the case, wait for it to become
 137 * available.
 138 */
 139static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 140                                         struct file *file)
 141{
 142        struct fuse_req *req = NULL;
 143        struct fuse_file *ff = file->private_data;
 144
 145        do {
 146                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 147                spin_lock(&fc->lock);
 148                if (ff->reserved_req) {
 149                        req = ff->reserved_req;
 150                        ff->reserved_req = NULL;
 151                        req->stolen_file = get_file(file);
 152                }
 153                spin_unlock(&fc->lock);
 154        } while (!req);
 155
 156        return req;
 157}
 158
 159/*
 160 * Put stolen request back into fuse_file->reserved_req
 161 */
 162static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 163{
 164        struct file *file = req->stolen_file;
 165        struct fuse_file *ff = file->private_data;
 166
 167        spin_lock(&fc->lock);
 168        fuse_request_init(req);
 169        BUG_ON(ff->reserved_req);
 170        ff->reserved_req = req;
 171        wake_up_all(&fc->reserved_req_waitq);
 172        spin_unlock(&fc->lock);
 173        fput(file);
 174}
 175
 176/*
 177 * Gets a requests for a file operation, always succeeds
 178 *
 179 * This is used for sending the FLUSH request, which must get to
 180 * userspace, due to POSIX locks which may need to be unlocked.
 181 *
 182 * If allocation fails due to OOM, use the reserved request in
 183 * fuse_file.
 184 *
 185 * This is very unlikely to deadlock accidentally, since the
 186 * filesystem should not have it's own file open.  If deadlock is
 187 * intentional, it can still be broken by "aborting" the filesystem.
 188 */
 189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 190{
 191        struct fuse_req *req;
 192
 193        atomic_inc(&fc->num_waiting);
 194        wait_event(fc->blocked_waitq, !fc->blocked);
 195        req = fuse_request_alloc();
 196        if (!req)
 197                req = get_reserved_req(fc, file);
 198
 199        fuse_req_init_context(req);
 200        req->waiting = 1;
 201        return req;
 202}
 203
 204void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 205{
 206        if (atomic_dec_and_test(&req->count)) {
 207                if (req->waiting)
 208                        atomic_dec(&fc->num_waiting);
 209
 210                if (req->stolen_file)
 211                        put_reserved_req(fc, req);
 212                else
 213                        fuse_request_free(req);
 214        }
 215}
 216EXPORT_SYMBOL_GPL(fuse_put_request);
 217
 218static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 219{
 220        unsigned nbytes = 0;
 221        unsigned i;
 222
 223        for (i = 0; i < numargs; i++)
 224                nbytes += args[i].size;
 225
 226        return nbytes;
 227}
 228
 229static u64 fuse_get_unique(struct fuse_conn *fc)
 230{
 231        fc->reqctr++;
 232        /* zero is special */
 233        if (fc->reqctr == 0)
 234                fc->reqctr = 1;
 235
 236        return fc->reqctr;
 237}
 238
 239static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 240{
 241        req->in.h.len = sizeof(struct fuse_in_header) +
 242                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 243        list_add_tail(&req->list, &fc->pending);
 244        req->state = FUSE_REQ_PENDING;
 245        if (!req->waiting) {
 246                req->waiting = 1;
 247                atomic_inc(&fc->num_waiting);
 248        }
 249        wake_up(&fc->waitq);
 250        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 251}
 252
 253void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 254                       u64 nodeid, u64 nlookup)
 255{
 256        forget->forget_one.nodeid = nodeid;
 257        forget->forget_one.nlookup = nlookup;
 258
 259        spin_lock(&fc->lock);
 260        if (fc->connected) {
 261                fc->forget_list_tail->next = forget;
 262                fc->forget_list_tail = forget;
 263                wake_up(&fc->waitq);
 264                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 265        } else {
 266                kfree(forget);
 267        }
 268        spin_unlock(&fc->lock);
 269}
 270
 271static void flush_bg_queue(struct fuse_conn *fc)
 272{
 273        while (fc->active_background < fc->max_background &&
 274               !list_empty(&fc->bg_queue)) {
 275                struct fuse_req *req;
 276
 277                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 278                list_del(&req->list);
 279                fc->active_background++;
 280                req->in.h.unique = fuse_get_unique(fc);
 281                queue_request(fc, req);
 282        }
 283}
 284
 285/*
 286 * This function is called when a request is finished.  Either a reply
 287 * has arrived or it was aborted (and not yet sent) or some error
 288 * occurred during communication with userspace, or the device file
 289 * was closed.  The requester thread is woken up (if still waiting),
 290 * the 'end' callback is called if given, else the reference to the
 291 * request is released
 292 *
 293 * Called with fc->lock, unlocks it
 294 */
 295static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 296__releases(fc->lock)
 297{
 298        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 299        req->end = NULL;
 300        list_del(&req->list);
 301        list_del(&req->intr_entry);
 302        req->state = FUSE_REQ_FINISHED;
 303        if (req->background) {
 304                if (fc->num_background == fc->max_background) {
 305                        fc->blocked = 0;
 306                        wake_up_all(&fc->blocked_waitq);
 307                }
 308                if (fc->num_background == fc->congestion_threshold &&
 309                    fc->connected && fc->bdi_initialized) {
 310                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 311                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 312                }
 313                fc->num_background--;
 314                fc->active_background--;
 315                flush_bg_queue(fc);
 316        }
 317        spin_unlock(&fc->lock);
 318        wake_up(&req->waitq);
 319        if (end)
 320                end(fc, req);
 321        fuse_put_request(fc, req);
 322}
 323
 324static void wait_answer_interruptible(struct fuse_conn *fc,
 325                                      struct fuse_req *req)
 326__releases(fc->lock)
 327__acquires(fc->lock)
 328{
 329        if (signal_pending(current))
 330                return;
 331
 332        spin_unlock(&fc->lock);
 333        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 334        spin_lock(&fc->lock);
 335}
 336
 337static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 338{
 339        list_add_tail(&req->intr_entry, &fc->interrupts);
 340        wake_up(&fc->waitq);
 341        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 342}
 343
 344static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 345__releases(fc->lock)
 346__acquires(fc->lock)
 347{
 348        if (!fc->no_interrupt) {
 349                /* Any signal may interrupt this */
 350                wait_answer_interruptible(fc, req);
 351
 352                if (req->aborted)
 353                        goto aborted;
 354                if (req->state == FUSE_REQ_FINISHED)
 355                        return;
 356
 357                req->interrupted = 1;
 358                if (req->state == FUSE_REQ_SENT)
 359                        queue_interrupt(fc, req);
 360        }
 361
 362        if (!req->force) {
 363                sigset_t oldset;
 364
 365                /* Only fatal signals may interrupt this */
 366                block_sigs(&oldset);
 367                wait_answer_interruptible(fc, req);
 368                restore_sigs(&oldset);
 369
 370                if (req->aborted)
 371                        goto aborted;
 372                if (req->state == FUSE_REQ_FINISHED)
 373                        return;
 374
 375                /* Request is not yet in userspace, bail out */
 376                if (req->state == FUSE_REQ_PENDING) {
 377                        list_del(&req->list);
 378                        __fuse_put_request(req);
 379                        req->out.h.error = -EINTR;
 380                        return;
 381                }
 382        }
 383
 384        /*
 385         * Either request is already in userspace, or it was forced.
 386         * Wait it out.
 387         */
 388        spin_unlock(&fc->lock);
 389        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 390        spin_lock(&fc->lock);
 391
 392        if (!req->aborted)
 393                return;
 394
 395 aborted:
 396        BUG_ON(req->state != FUSE_REQ_FINISHED);
 397        if (req->locked) {
 398                /* This is uninterruptible sleep, because data is
 399                   being copied to/from the buffers of req.  During
 400                   locked state, there mustn't be any filesystem
 401                   operation (e.g. page fault), since that could lead
 402                   to deadlock */
 403                spin_unlock(&fc->lock);
 404                wait_event(req->waitq, !req->locked);
 405                spin_lock(&fc->lock);
 406        }
 407}
 408
 409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 410{
 411        req->isreply = 1;
 412        spin_lock(&fc->lock);
 413        if (!fc->connected)
 414                req->out.h.error = -ENOTCONN;
 415        else if (fc->conn_error)
 416                req->out.h.error = -ECONNREFUSED;
 417        else {
 418                req->in.h.unique = fuse_get_unique(fc);
 419                queue_request(fc, req);
 420                /* acquire extra reference, since request is still needed
 421                   after request_end() */
 422                __fuse_get_request(req);
 423
 424                request_wait_answer(fc, req);
 425        }
 426        spin_unlock(&fc->lock);
 427}
 428EXPORT_SYMBOL_GPL(fuse_request_send);
 429
 430static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 431                                            struct fuse_req *req)
 432{
 433        req->background = 1;
 434        fc->num_background++;
 435        if (fc->num_background == fc->max_background)
 436                fc->blocked = 1;
 437        if (fc->num_background == fc->congestion_threshold &&
 438            fc->bdi_initialized) {
 439                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 440                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 441        }
 442        list_add_tail(&req->list, &fc->bg_queue);
 443        flush_bg_queue(fc);
 444}
 445
 446static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 447{
 448        spin_lock(&fc->lock);
 449        if (fc->connected) {
 450                fuse_request_send_nowait_locked(fc, req);
 451                spin_unlock(&fc->lock);
 452        } else {
 453                req->out.h.error = -ENOTCONN;
 454                request_end(fc, req);
 455        }
 456}
 457
 458void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 459{
 460        req->isreply = 1;
 461        fuse_request_send_nowait(fc, req);
 462}
 463EXPORT_SYMBOL_GPL(fuse_request_send_background);
 464
 465static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 466                                          struct fuse_req *req, u64 unique)
 467{
 468        int err = -ENODEV;
 469
 470        req->isreply = 0;
 471        req->in.h.unique = unique;
 472        spin_lock(&fc->lock);
 473        if (fc->connected) {
 474                queue_request(fc, req);
 475                err = 0;
 476        }
 477        spin_unlock(&fc->lock);
 478
 479        return err;
 480}
 481
 482/*
 483 * Called under fc->lock
 484 *
 485 * fc->connected must have been checked previously
 486 */
 487void fuse_request_send_background_locked(struct fuse_conn *fc,
 488                                         struct fuse_req *req)
 489{
 490        req->isreply = 1;
 491        fuse_request_send_nowait_locked(fc, req);
 492}
 493
 494/*
 495 * Lock the request.  Up to the next unlock_request() there mustn't be
 496 * anything that could cause a page-fault.  If the request was already
 497 * aborted bail out.
 498 */
 499static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 500{
 501        int err = 0;
 502        if (req) {
 503                spin_lock(&fc->lock);
 504                if (req->aborted)
 505                        err = -ENOENT;
 506                else
 507                        req->locked = 1;
 508                spin_unlock(&fc->lock);
 509        }
 510        return err;
 511}
 512
 513/*
 514 * Unlock request.  If it was aborted during being locked, the
 515 * requester thread is currently waiting for it to be unlocked, so
 516 * wake it up.
 517 */
 518static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 519{
 520        if (req) {
 521                spin_lock(&fc->lock);
 522                req->locked = 0;
 523                if (req->aborted)
 524                        wake_up(&req->waitq);
 525                spin_unlock(&fc->lock);
 526        }
 527}
 528
 529struct fuse_copy_state {
 530        struct fuse_conn *fc;
 531        int write;
 532        struct fuse_req *req;
 533        const struct iovec *iov;
 534        struct pipe_buffer *pipebufs;
 535        struct pipe_buffer *currbuf;
 536        struct pipe_inode_info *pipe;
 537        unsigned long nr_segs;
 538        unsigned long seglen;
 539        unsigned long addr;
 540        struct page *pg;
 541        void *mapaddr;
 542        void *buf;
 543        unsigned len;
 544        unsigned move_pages:1;
 545};
 546
 547static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 548                           int write,
 549                           const struct iovec *iov, unsigned long nr_segs)
 550{
 551        memset(cs, 0, sizeof(*cs));
 552        cs->fc = fc;
 553        cs->write = write;
 554        cs->iov = iov;
 555        cs->nr_segs = nr_segs;
 556}
 557
 558/* Unmap and put previous page of userspace buffer */
 559static void fuse_copy_finish(struct fuse_copy_state *cs)
 560{
 561        if (cs->currbuf) {
 562                struct pipe_buffer *buf = cs->currbuf;
 563
 564                if (!cs->write) {
 565                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
 566                } else {
 567                        kunmap(buf->page);
 568                        buf->len = PAGE_SIZE - cs->len;
 569                }
 570                cs->currbuf = NULL;
 571                cs->mapaddr = NULL;
 572        } else if (cs->mapaddr) {
 573                kunmap(cs->pg);
 574                if (cs->write) {
 575                        flush_dcache_page(cs->pg);
 576                        set_page_dirty_lock(cs->pg);
 577                }
 578                put_page(cs->pg);
 579                cs->mapaddr = NULL;
 580        }
 581}
 582
 583/*
 584 * Get another pagefull of userspace buffer, and map it to kernel
 585 * address space, and lock request
 586 */
 587static int fuse_copy_fill(struct fuse_copy_state *cs)
 588{
 589        unsigned long offset;
 590        int err;
 591
 592        unlock_request(cs->fc, cs->req);
 593        fuse_copy_finish(cs);
 594        if (cs->pipebufs) {
 595                struct pipe_buffer *buf = cs->pipebufs;
 596
 597                if (!cs->write) {
 598                        err = buf->ops->confirm(cs->pipe, buf);
 599                        if (err)
 600                                return err;
 601
 602                        BUG_ON(!cs->nr_segs);
 603                        cs->currbuf = buf;
 604                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
 605                        cs->len = buf->len;
 606                        cs->buf = cs->mapaddr + buf->offset;
 607                        cs->pipebufs++;
 608                        cs->nr_segs--;
 609                } else {
 610                        struct page *page;
 611
 612                        if (cs->nr_segs == cs->pipe->buffers)
 613                                return -EIO;
 614
 615                        page = alloc_page(GFP_HIGHUSER);
 616                        if (!page)
 617                                return -ENOMEM;
 618
 619                        buf->page = page;
 620                        buf->offset = 0;
 621                        buf->len = 0;
 622
 623                        cs->currbuf = buf;
 624                        cs->mapaddr = kmap(page);
 625                        cs->buf = cs->mapaddr;
 626                        cs->len = PAGE_SIZE;
 627                        cs->pipebufs++;
 628                        cs->nr_segs++;
 629                }
 630        } else {
 631                if (!cs->seglen) {
 632                        BUG_ON(!cs->nr_segs);
 633                        cs->seglen = cs->iov[0].iov_len;
 634                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 635                        cs->iov++;
 636                        cs->nr_segs--;
 637                }
 638                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
 639                if (err < 0)
 640                        return err;
 641                BUG_ON(err != 1);
 642                offset = cs->addr % PAGE_SIZE;
 643                cs->mapaddr = kmap(cs->pg);
 644                cs->buf = cs->mapaddr + offset;
 645                cs->len = min(PAGE_SIZE - offset, cs->seglen);
 646                cs->seglen -= cs->len;
 647                cs->addr += cs->len;
 648        }
 649
 650        return lock_request(cs->fc, cs->req);
 651}
 652
 653/* Do as much copy to/from userspace buffer as we can */
 654static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 655{
 656        unsigned ncpy = min(*size, cs->len);
 657        if (val) {
 658                if (cs->write)
 659                        memcpy(cs->buf, *val, ncpy);
 660                else
 661                        memcpy(*val, cs->buf, ncpy);
 662                *val += ncpy;
 663        }
 664        *size -= ncpy;
 665        cs->len -= ncpy;
 666        cs->buf += ncpy;
 667        return ncpy;
 668}
 669
 670static int fuse_check_page(struct page *page)
 671{
 672        if (page_mapcount(page) ||
 673            page->mapping != NULL ||
 674            page_count(page) != 1 ||
 675            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 676             ~(1 << PG_locked |
 677               1 << PG_referenced |
 678               1 << PG_uptodate |
 679               1 << PG_lru |
 680               1 << PG_active |
 681               1 << PG_reclaim))) {
 682                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 683                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 684                return 1;
 685        }
 686        return 0;
 687}
 688
 689static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 690{
 691        int err;
 692        struct page *oldpage = *pagep;
 693        struct page *newpage;
 694        struct pipe_buffer *buf = cs->pipebufs;
 695
 696        unlock_request(cs->fc, cs->req);
 697        fuse_copy_finish(cs);
 698
 699        err = buf->ops->confirm(cs->pipe, buf);
 700        if (err)
 701                return err;
 702
 703        BUG_ON(!cs->nr_segs);
 704        cs->currbuf = buf;
 705        cs->len = buf->len;
 706        cs->pipebufs++;
 707        cs->nr_segs--;
 708
 709        if (cs->len != PAGE_SIZE)
 710                goto out_fallback;
 711
 712        if (buf->ops->steal(cs->pipe, buf) != 0)
 713                goto out_fallback;
 714
 715        newpage = buf->page;
 716
 717        if (WARN_ON(!PageUptodate(newpage)))
 718                return -EIO;
 719
 720        ClearPageMappedToDisk(newpage);
 721
 722        if (fuse_check_page(newpage) != 0)
 723                goto out_fallback_unlock;
 724
 725        /*
 726         * This is a new and locked page, it shouldn't be mapped or
 727         * have any special flags on it
 728         */
 729        if (WARN_ON(page_mapped(oldpage)))
 730                goto out_fallback_unlock;
 731        if (WARN_ON(page_has_private(oldpage)))
 732                goto out_fallback_unlock;
 733        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 734                goto out_fallback_unlock;
 735        if (WARN_ON(PageMlocked(oldpage)))
 736                goto out_fallback_unlock;
 737
 738        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 739        if (err) {
 740                unlock_page(newpage);
 741                return err;
 742        }
 743
 744        page_cache_get(newpage);
 745
 746        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 747                lru_cache_add_file(newpage);
 748
 749        err = 0;
 750        spin_lock(&cs->fc->lock);
 751        if (cs->req->aborted)
 752                err = -ENOENT;
 753        else
 754                *pagep = newpage;
 755        spin_unlock(&cs->fc->lock);
 756
 757        if (err) {
 758                unlock_page(newpage);
 759                page_cache_release(newpage);
 760                return err;
 761        }
 762
 763        unlock_page(oldpage);
 764        page_cache_release(oldpage);
 765        cs->len = 0;
 766
 767        return 0;
 768
 769out_fallback_unlock:
 770        unlock_page(newpage);
 771out_fallback:
 772        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
 773        cs->buf = cs->mapaddr + buf->offset;
 774
 775        err = lock_request(cs->fc, cs->req);
 776        if (err)
 777                return err;
 778
 779        return 1;
 780}
 781
 782static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 783                         unsigned offset, unsigned count)
 784{
 785        struct pipe_buffer *buf;
 786
 787        if (cs->nr_segs == cs->pipe->buffers)
 788                return -EIO;
 789
 790        unlock_request(cs->fc, cs->req);
 791        fuse_copy_finish(cs);
 792
 793        buf = cs->pipebufs;
 794        page_cache_get(page);
 795        buf->page = page;
 796        buf->offset = offset;
 797        buf->len = count;
 798
 799        cs->pipebufs++;
 800        cs->nr_segs++;
 801        cs->len = 0;
 802
 803        return 0;
 804}
 805
 806/*
 807 * Copy a page in the request to/from the userspace buffer.  Must be
 808 * done atomically
 809 */
 810static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 811                          unsigned offset, unsigned count, int zeroing)
 812{
 813        int err;
 814        struct page *page = *pagep;
 815
 816        if (page && zeroing && count < PAGE_SIZE)
 817                clear_highpage(page);
 818
 819        while (count) {
 820                if (cs->write && cs->pipebufs && page) {
 821                        return fuse_ref_page(cs, page, offset, count);
 822                } else if (!cs->len) {
 823                        if (cs->move_pages && page &&
 824                            offset == 0 && count == PAGE_SIZE) {
 825                                err = fuse_try_move_page(cs, pagep);
 826                                if (err <= 0)
 827                                        return err;
 828                        } else {
 829                                err = fuse_copy_fill(cs);
 830                                if (err)
 831                                        return err;
 832                        }
 833                }
 834                if (page) {
 835                        void *mapaddr = kmap_atomic(page);
 836                        void *buf = mapaddr + offset;
 837                        offset += fuse_copy_do(cs, &buf, &count);
 838                        kunmap_atomic(mapaddr);
 839                } else
 840                        offset += fuse_copy_do(cs, NULL, &count);
 841        }
 842        if (page && !cs->write)
 843                flush_dcache_page(page);
 844        return 0;
 845}
 846
 847/* Copy pages in the request to/from userspace buffer */
 848static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 849                           int zeroing)
 850{
 851        unsigned i;
 852        struct fuse_req *req = cs->req;
 853        unsigned offset = req->page_offset;
 854        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
 855
 856        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 857                int err;
 858
 859                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 860                                     zeroing);
 861                if (err)
 862                        return err;
 863
 864                nbytes -= count;
 865                count = min(nbytes, (unsigned) PAGE_SIZE);
 866                offset = 0;
 867        }
 868        return 0;
 869}
 870
 871/* Copy a single argument in the request to/from userspace buffer */
 872static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 873{
 874        while (size) {
 875                if (!cs->len) {
 876                        int err = fuse_copy_fill(cs);
 877                        if (err)
 878                                return err;
 879                }
 880                fuse_copy_do(cs, &val, &size);
 881        }
 882        return 0;
 883}
 884
 885/* Copy request arguments to/from userspace buffer */
 886static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 887                          unsigned argpages, struct fuse_arg *args,
 888                          int zeroing)
 889{
 890        int err = 0;
 891        unsigned i;
 892
 893        for (i = 0; !err && i < numargs; i++)  {
 894                struct fuse_arg *arg = &args[i];
 895                if (i == numargs - 1 && argpages)
 896                        err = fuse_copy_pages(cs, arg->size, zeroing);
 897                else
 898                        err = fuse_copy_one(cs, arg->value, arg->size);
 899        }
 900        return err;
 901}
 902
 903static int forget_pending(struct fuse_conn *fc)
 904{
 905        return fc->forget_list_head.next != NULL;
 906}
 907
 908static int request_pending(struct fuse_conn *fc)
 909{
 910        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
 911                forget_pending(fc);
 912}
 913
 914/* Wait until a request is available on the pending list */
 915static void request_wait(struct fuse_conn *fc)
 916__releases(fc->lock)
 917__acquires(fc->lock)
 918{
 919        DECLARE_WAITQUEUE(wait, current);
 920
 921        add_wait_queue_exclusive(&fc->waitq, &wait);
 922        while (fc->connected && !request_pending(fc)) {
 923                set_current_state(TASK_INTERRUPTIBLE);
 924                if (signal_pending(current))
 925                        break;
 926
 927                spin_unlock(&fc->lock);
 928                schedule();
 929                spin_lock(&fc->lock);
 930        }
 931        set_current_state(TASK_RUNNING);
 932        remove_wait_queue(&fc->waitq, &wait);
 933}
 934
 935/*
 936 * Transfer an interrupt request to userspace
 937 *
 938 * Unlike other requests this is assembled on demand, without a need
 939 * to allocate a separate fuse_req structure.
 940 *
 941 * Called with fc->lock held, releases it
 942 */
 943static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
 944                               size_t nbytes, struct fuse_req *req)
 945__releases(fc->lock)
 946{
 947        struct fuse_in_header ih;
 948        struct fuse_interrupt_in arg;
 949        unsigned reqsize = sizeof(ih) + sizeof(arg);
 950        int err;
 951
 952        list_del_init(&req->intr_entry);
 953        req->intr_unique = fuse_get_unique(fc);
 954        memset(&ih, 0, sizeof(ih));
 955        memset(&arg, 0, sizeof(arg));
 956        ih.len = reqsize;
 957        ih.opcode = FUSE_INTERRUPT;
 958        ih.unique = req->intr_unique;
 959        arg.unique = req->in.h.unique;
 960
 961        spin_unlock(&fc->lock);
 962        if (nbytes < reqsize)
 963                return -EINVAL;
 964
 965        err = fuse_copy_one(cs, &ih, sizeof(ih));
 966        if (!err)
 967                err = fuse_copy_one(cs, &arg, sizeof(arg));
 968        fuse_copy_finish(cs);
 969
 970        return err ? err : reqsize;
 971}
 972
 973static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
 974                                               unsigned max,
 975                                               unsigned *countp)
 976{
 977        struct fuse_forget_link *head = fc->forget_list_head.next;
 978        struct fuse_forget_link **newhead = &head;
 979        unsigned count;
 980
 981        for (count = 0; *newhead != NULL && count < max; count++)
 982                newhead = &(*newhead)->next;
 983
 984        fc->forget_list_head.next = *newhead;
 985        *newhead = NULL;
 986        if (fc->forget_list_head.next == NULL)
 987                fc->forget_list_tail = &fc->forget_list_head;
 988
 989        if (countp != NULL)
 990                *countp = count;
 991
 992        return head;
 993}
 994
 995static int fuse_read_single_forget(struct fuse_conn *fc,
 996                                   struct fuse_copy_state *cs,
 997                                   size_t nbytes)
 998__releases(fc->lock)
 999{
1000        int err;
1001        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1002        struct fuse_forget_in arg = {
1003                .nlookup = forget->forget_one.nlookup,
1004        };
1005        struct fuse_in_header ih = {
1006                .opcode = FUSE_FORGET,
1007                .nodeid = forget->forget_one.nodeid,
1008                .unique = fuse_get_unique(fc),
1009                .len = sizeof(ih) + sizeof(arg),
1010        };
1011
1012        spin_unlock(&fc->lock);
1013        kfree(forget);
1014        if (nbytes < ih.len)
1015                return -EINVAL;
1016
1017        err = fuse_copy_one(cs, &ih, sizeof(ih));
1018        if (!err)
1019                err = fuse_copy_one(cs, &arg, sizeof(arg));
1020        fuse_copy_finish(cs);
1021
1022        if (err)
1023                return err;
1024
1025        return ih.len;
1026}
1027
1028static int fuse_read_batch_forget(struct fuse_conn *fc,
1029                                   struct fuse_copy_state *cs, size_t nbytes)
1030__releases(fc->lock)
1031{
1032        int err;
1033        unsigned max_forgets;
1034        unsigned count;
1035        struct fuse_forget_link *head;
1036        struct fuse_batch_forget_in arg = { .count = 0 };
1037        struct fuse_in_header ih = {
1038                .opcode = FUSE_BATCH_FORGET,
1039                .unique = fuse_get_unique(fc),
1040                .len = sizeof(ih) + sizeof(arg),
1041        };
1042
1043        if (nbytes < ih.len) {
1044                spin_unlock(&fc->lock);
1045                return -EINVAL;
1046        }
1047
1048        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1049        head = dequeue_forget(fc, max_forgets, &count);
1050        spin_unlock(&fc->lock);
1051
1052        arg.count = count;
1053        ih.len += count * sizeof(struct fuse_forget_one);
1054        err = fuse_copy_one(cs, &ih, sizeof(ih));
1055        if (!err)
1056                err = fuse_copy_one(cs, &arg, sizeof(arg));
1057
1058        while (head) {
1059                struct fuse_forget_link *forget = head;
1060
1061                if (!err) {
1062                        err = fuse_copy_one(cs, &forget->forget_one,
1063                                            sizeof(forget->forget_one));
1064                }
1065                head = forget->next;
1066                kfree(forget);
1067        }
1068
1069        fuse_copy_finish(cs);
1070
1071        if (err)
1072                return err;
1073
1074        return ih.len;
1075}
1076
1077static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1078                            size_t nbytes)
1079__releases(fc->lock)
1080{
1081        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1082                return fuse_read_single_forget(fc, cs, nbytes);
1083        else
1084                return fuse_read_batch_forget(fc, cs, nbytes);
1085}
1086
1087/*
1088 * Read a single request into the userspace filesystem's buffer.  This
1089 * function waits until a request is available, then removes it from
1090 * the pending list and copies request data to userspace buffer.  If
1091 * no reply is needed (FORGET) or request has been aborted or there
1092 * was an error during the copying then it's finished by calling
1093 * request_end().  Otherwise add it to the processing list, and set
1094 * the 'sent' flag.
1095 */
1096static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1097                                struct fuse_copy_state *cs, size_t nbytes)
1098{
1099        int err;
1100        struct fuse_req *req;
1101        struct fuse_in *in;
1102        unsigned reqsize;
1103
1104 restart:
1105        spin_lock(&fc->lock);
1106        err = -EAGAIN;
1107        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1108            !request_pending(fc))
1109                goto err_unlock;
1110
1111        request_wait(fc);
1112        err = -ENODEV;
1113        if (!fc->connected)
1114                goto err_unlock;
1115        err = -ERESTARTSYS;
1116        if (!request_pending(fc))
1117                goto err_unlock;
1118
1119        if (!list_empty(&fc->interrupts)) {
1120                req = list_entry(fc->interrupts.next, struct fuse_req,
1121                                 intr_entry);
1122                return fuse_read_interrupt(fc, cs, nbytes, req);
1123        }
1124
1125        if (forget_pending(fc)) {
1126                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1127                        return fuse_read_forget(fc, cs, nbytes);
1128
1129                if (fc->forget_batch <= -8)
1130                        fc->forget_batch = 16;
1131        }
1132
1133        req = list_entry(fc->pending.next, struct fuse_req, list);
1134        req->state = FUSE_REQ_READING;
1135        list_move(&req->list, &fc->io);
1136
1137        in = &req->in;
1138        reqsize = in->h.len;
1139        /* If request is too large, reply with an error and restart the read */
1140        if (nbytes < reqsize) {
1141                req->out.h.error = -EIO;
1142                /* SETXATTR is special, since it may contain too large data */
1143                if (in->h.opcode == FUSE_SETXATTR)
1144                        req->out.h.error = -E2BIG;
1145                request_end(fc, req);
1146                goto restart;
1147        }
1148        spin_unlock(&fc->lock);
1149        cs->req = req;
1150        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1151        if (!err)
1152                err = fuse_copy_args(cs, in->numargs, in->argpages,
1153                                     (struct fuse_arg *) in->args, 0);
1154        fuse_copy_finish(cs);
1155        spin_lock(&fc->lock);
1156        req->locked = 0;
1157        if (req->aborted) {
1158                request_end(fc, req);
1159                return -ENODEV;
1160        }
1161        if (err) {
1162                req->out.h.error = -EIO;
1163                request_end(fc, req);
1164                return err;
1165        }
1166        if (!req->isreply)
1167                request_end(fc, req);
1168        else {
1169                req->state = FUSE_REQ_SENT;
1170                list_move_tail(&req->list, &fc->processing);
1171                if (req->interrupted)
1172                        queue_interrupt(fc, req);
1173                spin_unlock(&fc->lock);
1174        }
1175        return reqsize;
1176
1177 err_unlock:
1178        spin_unlock(&fc->lock);
1179        return err;
1180}
1181
1182static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1183                              unsigned long nr_segs, loff_t pos)
1184{
1185        struct fuse_copy_state cs;
1186        struct file *file = iocb->ki_filp;
1187        struct fuse_conn *fc = fuse_get_conn(file);
1188        if (!fc)
1189                return -EPERM;
1190
1191        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1192
1193        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1194}
1195
1196static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1197                                   struct pipe_buffer *buf)
1198{
1199        return 1;
1200}
1201
1202static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1203        .can_merge = 0,
1204        .map = generic_pipe_buf_map,
1205        .unmap = generic_pipe_buf_unmap,
1206        .confirm = generic_pipe_buf_confirm,
1207        .release = generic_pipe_buf_release,
1208        .steal = fuse_dev_pipe_buf_steal,
1209        .get = generic_pipe_buf_get,
1210};
1211
1212static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1213                                    struct pipe_inode_info *pipe,
1214                                    size_t len, unsigned int flags)
1215{
1216        int ret;
1217        int page_nr = 0;
1218        int do_wakeup = 0;
1219        struct pipe_buffer *bufs;
1220        struct fuse_copy_state cs;
1221        struct fuse_conn *fc = fuse_get_conn(in);
1222        if (!fc)
1223                return -EPERM;
1224
1225        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1226        if (!bufs)
1227                return -ENOMEM;
1228
1229        fuse_copy_init(&cs, fc, 1, NULL, 0);
1230        cs.pipebufs = bufs;
1231        cs.pipe = pipe;
1232        ret = fuse_dev_do_read(fc, in, &cs, len);
1233        if (ret < 0)
1234                goto out;
1235
1236        ret = 0;
1237        pipe_lock(pipe);
1238
1239        if (!pipe->readers) {
1240                send_sig(SIGPIPE, current, 0);
1241                if (!ret)
1242                        ret = -EPIPE;
1243                goto out_unlock;
1244        }
1245
1246        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1247                ret = -EIO;
1248                goto out_unlock;
1249        }
1250
1251        while (page_nr < cs.nr_segs) {
1252                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1253                struct pipe_buffer *buf = pipe->bufs + newbuf;
1254
1255                buf->page = bufs[page_nr].page;
1256                buf->offset = bufs[page_nr].offset;
1257                buf->len = bufs[page_nr].len;
1258                buf->ops = &fuse_dev_pipe_buf_ops;
1259
1260                pipe->nrbufs++;
1261                page_nr++;
1262                ret += buf->len;
1263
1264                if (pipe->inode)
1265                        do_wakeup = 1;
1266        }
1267
1268out_unlock:
1269        pipe_unlock(pipe);
1270
1271        if (do_wakeup) {
1272                smp_mb();
1273                if (waitqueue_active(&pipe->wait))
1274                        wake_up_interruptible(&pipe->wait);
1275                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1276        }
1277
1278out:
1279        for (; page_nr < cs.nr_segs; page_nr++)
1280                page_cache_release(bufs[page_nr].page);
1281
1282        kfree(bufs);
1283        return ret;
1284}
1285
1286static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1287                            struct fuse_copy_state *cs)
1288{
1289        struct fuse_notify_poll_wakeup_out outarg;
1290        int err = -EINVAL;
1291
1292        if (size != sizeof(outarg))
1293                goto err;
1294
1295        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1296        if (err)
1297                goto err;
1298
1299        fuse_copy_finish(cs);
1300        return fuse_notify_poll_wakeup(fc, &outarg);
1301
1302err:
1303        fuse_copy_finish(cs);
1304        return err;
1305}
1306
1307static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1308                                   struct fuse_copy_state *cs)
1309{
1310        struct fuse_notify_inval_inode_out outarg;
1311        int err = -EINVAL;
1312
1313        if (size != sizeof(outarg))
1314                goto err;
1315
1316        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1317        if (err)
1318                goto err;
1319        fuse_copy_finish(cs);
1320
1321        down_read(&fc->killsb);
1322        err = -ENOENT;
1323        if (fc->sb) {
1324                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1325                                               outarg.off, outarg.len);
1326        }
1327        up_read(&fc->killsb);
1328        return err;
1329
1330err:
1331        fuse_copy_finish(cs);
1332        return err;
1333}
1334
1335static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1336                                   struct fuse_copy_state *cs)
1337{
1338        struct fuse_notify_inval_entry_out outarg;
1339        int err = -ENOMEM;
1340        char *buf;
1341        struct qstr name;
1342
1343        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1344        if (!buf)
1345                goto err;
1346
1347        err = -EINVAL;
1348        if (size < sizeof(outarg))
1349                goto err;
1350
1351        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1352        if (err)
1353                goto err;
1354
1355        err = -ENAMETOOLONG;
1356        if (outarg.namelen > FUSE_NAME_MAX)
1357                goto err;
1358
1359        err = -EINVAL;
1360        if (size != sizeof(outarg) + outarg.namelen + 1)
1361                goto err;
1362
1363        name.name = buf;
1364        name.len = outarg.namelen;
1365        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1366        if (err)
1367                goto err;
1368        fuse_copy_finish(cs);
1369        buf[outarg.namelen] = 0;
1370        name.hash = full_name_hash(name.name, name.len);
1371
1372        down_read(&fc->killsb);
1373        err = -ENOENT;
1374        if (fc->sb)
1375                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1376        up_read(&fc->killsb);
1377        kfree(buf);
1378        return err;
1379
1380err:
1381        kfree(buf);
1382        fuse_copy_finish(cs);
1383        return err;
1384}
1385
1386static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1387                              struct fuse_copy_state *cs)
1388{
1389        struct fuse_notify_delete_out outarg;
1390        int err = -ENOMEM;
1391        char *buf;
1392        struct qstr name;
1393
1394        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1395        if (!buf)
1396                goto err;
1397
1398        err = -EINVAL;
1399        if (size < sizeof(outarg))
1400                goto err;
1401
1402        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1403        if (err)
1404                goto err;
1405
1406        err = -ENAMETOOLONG;
1407        if (outarg.namelen > FUSE_NAME_MAX)
1408                goto err;
1409
1410        err = -EINVAL;
1411        if (size != sizeof(outarg) + outarg.namelen + 1)
1412                goto err;
1413
1414        name.name = buf;
1415        name.len = outarg.namelen;
1416        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1417        if (err)
1418                goto err;
1419        fuse_copy_finish(cs);
1420        buf[outarg.namelen] = 0;
1421        name.hash = full_name_hash(name.name, name.len);
1422
1423        down_read(&fc->killsb);
1424        err = -ENOENT;
1425        if (fc->sb)
1426                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1427                                               outarg.child, &name);
1428        up_read(&fc->killsb);
1429        kfree(buf);
1430        return err;
1431
1432err:
1433        kfree(buf);
1434        fuse_copy_finish(cs);
1435        return err;
1436}
1437
1438static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1439                             struct fuse_copy_state *cs)
1440{
1441        struct fuse_notify_store_out outarg;
1442        struct inode *inode;
1443        struct address_space *mapping;
1444        u64 nodeid;
1445        int err;
1446        pgoff_t index;
1447        unsigned int offset;
1448        unsigned int num;
1449        loff_t file_size;
1450        loff_t end;
1451
1452        err = -EINVAL;
1453        if (size < sizeof(outarg))
1454                goto out_finish;
1455
1456        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1457        if (err)
1458                goto out_finish;
1459
1460        err = -EINVAL;
1461        if (size - sizeof(outarg) != outarg.size)
1462                goto out_finish;
1463
1464        nodeid = outarg.nodeid;
1465
1466        down_read(&fc->killsb);
1467
1468        err = -ENOENT;
1469        if (!fc->sb)
1470                goto out_up_killsb;
1471
1472        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1473        if (!inode)
1474                goto out_up_killsb;
1475
1476        mapping = inode->i_mapping;
1477        index = outarg.offset >> PAGE_CACHE_SHIFT;
1478        offset = outarg.offset & ~PAGE_CACHE_MASK;
1479        file_size = i_size_read(inode);
1480        end = outarg.offset + outarg.size;
1481        if (end > file_size) {
1482                file_size = end;
1483                fuse_write_update_size(inode, file_size);
1484        }
1485
1486        num = outarg.size;
1487        while (num) {
1488                struct page *page;
1489                unsigned int this_num;
1490
1491                err = -ENOMEM;
1492                page = find_or_create_page(mapping, index,
1493                                           mapping_gfp_mask(mapping));
1494                if (!page)
1495                        goto out_iput;
1496
1497                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1498                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1499                if (!err && offset == 0 && (num != 0 || file_size == end))
1500                        SetPageUptodate(page);
1501                unlock_page(page);
1502                page_cache_release(page);
1503
1504                if (err)
1505                        goto out_iput;
1506
1507                num -= this_num;
1508                offset = 0;
1509                index++;
1510        }
1511
1512        err = 0;
1513
1514out_iput:
1515        iput(inode);
1516out_up_killsb:
1517        up_read(&fc->killsb);
1518out_finish:
1519        fuse_copy_finish(cs);
1520        return err;
1521}
1522
1523static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1524{
1525        release_pages(req->pages, req->num_pages, 0);
1526}
1527
1528static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1529                         struct fuse_notify_retrieve_out *outarg)
1530{
1531        int err;
1532        struct address_space *mapping = inode->i_mapping;
1533        struct fuse_req *req;
1534        pgoff_t index;
1535        loff_t file_size;
1536        unsigned int num;
1537        unsigned int offset;
1538        size_t total_len = 0;
1539
1540        req = fuse_get_req(fc);
1541        if (IS_ERR(req))
1542                return PTR_ERR(req);
1543
1544        offset = outarg->offset & ~PAGE_CACHE_MASK;
1545
1546        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1547        req->in.h.nodeid = outarg->nodeid;
1548        req->in.numargs = 2;
1549        req->in.argpages = 1;
1550        req->page_offset = offset;
1551        req->end = fuse_retrieve_end;
1552
1553        index = outarg->offset >> PAGE_CACHE_SHIFT;
1554        file_size = i_size_read(inode);
1555        num = outarg->size;
1556        if (outarg->offset > file_size)
1557                num = 0;
1558        else if (outarg->offset + num > file_size)
1559                num = file_size - outarg->offset;
1560
1561        while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1562                struct page *page;
1563                unsigned int this_num;
1564
1565                page = find_get_page(mapping, index);
1566                if (!page)
1567                        break;
1568
1569                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1570                req->pages[req->num_pages] = page;
1571                req->num_pages++;
1572
1573                offset = 0;
1574                num -= this_num;
1575                total_len += this_num;
1576                index++;
1577        }
1578        req->misc.retrieve_in.offset = outarg->offset;
1579        req->misc.retrieve_in.size = total_len;
1580        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1581        req->in.args[0].value = &req->misc.retrieve_in;
1582        req->in.args[1].size = total_len;
1583
1584        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1585        if (err)
1586                fuse_retrieve_end(fc, req);
1587
1588        return err;
1589}
1590
1591static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1592                                struct fuse_copy_state *cs)
1593{
1594        struct fuse_notify_retrieve_out outarg;
1595        struct inode *inode;
1596        int err;
1597
1598        err = -EINVAL;
1599        if (size != sizeof(outarg))
1600                goto copy_finish;
1601
1602        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1603        if (err)
1604                goto copy_finish;
1605
1606        fuse_copy_finish(cs);
1607
1608        down_read(&fc->killsb);
1609        err = -ENOENT;
1610        if (fc->sb) {
1611                u64 nodeid = outarg.nodeid;
1612
1613                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1614                if (inode) {
1615                        err = fuse_retrieve(fc, inode, &outarg);
1616                        iput(inode);
1617                }
1618        }
1619        up_read(&fc->killsb);
1620
1621        return err;
1622
1623copy_finish:
1624        fuse_copy_finish(cs);
1625        return err;
1626}
1627
1628static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1629                       unsigned int size, struct fuse_copy_state *cs)
1630{
1631        switch (code) {
1632        case FUSE_NOTIFY_POLL:
1633                return fuse_notify_poll(fc, size, cs);
1634
1635        case FUSE_NOTIFY_INVAL_INODE:
1636                return fuse_notify_inval_inode(fc, size, cs);
1637
1638        case FUSE_NOTIFY_INVAL_ENTRY:
1639                return fuse_notify_inval_entry(fc, size, cs);
1640
1641        case FUSE_NOTIFY_STORE:
1642                return fuse_notify_store(fc, size, cs);
1643
1644        case FUSE_NOTIFY_RETRIEVE:
1645                return fuse_notify_retrieve(fc, size, cs);
1646
1647        case FUSE_NOTIFY_DELETE:
1648                return fuse_notify_delete(fc, size, cs);
1649
1650        default:
1651                fuse_copy_finish(cs);
1652                return -EINVAL;
1653        }
1654}
1655
1656/* Look up request on processing list by unique ID */
1657static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1658{
1659        struct list_head *entry;
1660
1661        list_for_each(entry, &fc->processing) {
1662                struct fuse_req *req;
1663                req = list_entry(entry, struct fuse_req, list);
1664                if (req->in.h.unique == unique || req->intr_unique == unique)
1665                        return req;
1666        }
1667        return NULL;
1668}
1669
1670static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1671                         unsigned nbytes)
1672{
1673        unsigned reqsize = sizeof(struct fuse_out_header);
1674
1675        if (out->h.error)
1676                return nbytes != reqsize ? -EINVAL : 0;
1677
1678        reqsize += len_args(out->numargs, out->args);
1679
1680        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1681                return -EINVAL;
1682        else if (reqsize > nbytes) {
1683                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1684                unsigned diffsize = reqsize - nbytes;
1685                if (diffsize > lastarg->size)
1686                        return -EINVAL;
1687                lastarg->size -= diffsize;
1688        }
1689        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1690                              out->page_zeroing);
1691}
1692
1693/*
1694 * Write a single reply to a request.  First the header is copied from
1695 * the write buffer.  The request is then searched on the processing
1696 * list by the unique ID found in the header.  If found, then remove
1697 * it from the list and copy the rest of the buffer to the request.
1698 * The request is finished by calling request_end()
1699 */
1700static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1701                                 struct fuse_copy_state *cs, size_t nbytes)
1702{
1703        int err;
1704        struct fuse_req *req;
1705        struct fuse_out_header oh;
1706
1707        if (nbytes < sizeof(struct fuse_out_header))
1708                return -EINVAL;
1709
1710        err = fuse_copy_one(cs, &oh, sizeof(oh));
1711        if (err)
1712                goto err_finish;
1713
1714        err = -EINVAL;
1715        if (oh.len != nbytes)
1716                goto err_finish;
1717
1718        /*
1719         * Zero oh.unique indicates unsolicited notification message
1720         * and error contains notification code.
1721         */
1722        if (!oh.unique) {
1723                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1724                return err ? err : nbytes;
1725        }
1726
1727        err = -EINVAL;
1728        if (oh.error <= -1000 || oh.error > 0)
1729                goto err_finish;
1730
1731        spin_lock(&fc->lock);
1732        err = -ENOENT;
1733        if (!fc->connected)
1734                goto err_unlock;
1735
1736        req = request_find(fc, oh.unique);
1737        if (!req)
1738                goto err_unlock;
1739
1740        if (req->aborted) {
1741                spin_unlock(&fc->lock);
1742                fuse_copy_finish(cs);
1743                spin_lock(&fc->lock);
1744                request_end(fc, req);
1745                return -ENOENT;
1746        }
1747        /* Is it an interrupt reply? */
1748        if (req->intr_unique == oh.unique) {
1749                err = -EINVAL;
1750                if (nbytes != sizeof(struct fuse_out_header))
1751                        goto err_unlock;
1752
1753                if (oh.error == -ENOSYS)
1754                        fc->no_interrupt = 1;
1755                else if (oh.error == -EAGAIN)
1756                        queue_interrupt(fc, req);
1757
1758                spin_unlock(&fc->lock);
1759                fuse_copy_finish(cs);
1760                return nbytes;
1761        }
1762
1763        req->state = FUSE_REQ_WRITING;
1764        list_move(&req->list, &fc->io);
1765        req->out.h = oh;
1766        req->locked = 1;
1767        cs->req = req;
1768        if (!req->out.page_replace)
1769                cs->move_pages = 0;
1770        spin_unlock(&fc->lock);
1771
1772        err = copy_out_args(cs, &req->out, nbytes);
1773        fuse_copy_finish(cs);
1774
1775        spin_lock(&fc->lock);
1776        req->locked = 0;
1777        if (!err) {
1778                if (req->aborted)
1779                        err = -ENOENT;
1780        } else if (!req->aborted)
1781                req->out.h.error = -EIO;
1782        request_end(fc, req);
1783
1784        return err ? err : nbytes;
1785
1786 err_unlock:
1787        spin_unlock(&fc->lock);
1788 err_finish:
1789        fuse_copy_finish(cs);
1790        return err;
1791}
1792
1793static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1794                              unsigned long nr_segs, loff_t pos)
1795{
1796        struct fuse_copy_state cs;
1797        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1798        if (!fc)
1799                return -EPERM;
1800
1801        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1802
1803        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1804}
1805
1806static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1807                                     struct file *out, loff_t *ppos,
1808                                     size_t len, unsigned int flags)
1809{
1810        unsigned nbuf;
1811        unsigned idx;
1812        struct pipe_buffer *bufs;
1813        struct fuse_copy_state cs;
1814        struct fuse_conn *fc;
1815        size_t rem;
1816        ssize_t ret;
1817
1818        fc = fuse_get_conn(out);
1819        if (!fc)
1820                return -EPERM;
1821
1822        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1823        if (!bufs)
1824                return -ENOMEM;
1825
1826        pipe_lock(pipe);
1827        nbuf = 0;
1828        rem = 0;
1829        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1830                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1831
1832        ret = -EINVAL;
1833        if (rem < len) {
1834                pipe_unlock(pipe);
1835                goto out;
1836        }
1837
1838        rem = len;
1839        while (rem) {
1840                struct pipe_buffer *ibuf;
1841                struct pipe_buffer *obuf;
1842
1843                BUG_ON(nbuf >= pipe->buffers);
1844                BUG_ON(!pipe->nrbufs);
1845                ibuf = &pipe->bufs[pipe->curbuf];
1846                obuf = &bufs[nbuf];
1847
1848                if (rem >= ibuf->len) {
1849                        *obuf = *ibuf;
1850                        ibuf->ops = NULL;
1851                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1852                        pipe->nrbufs--;
1853                } else {
1854                        ibuf->ops->get(pipe, ibuf);
1855                        *obuf = *ibuf;
1856                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1857                        obuf->len = rem;
1858                        ibuf->offset += obuf->len;
1859                        ibuf->len -= obuf->len;
1860                }
1861                nbuf++;
1862                rem -= obuf->len;
1863        }
1864        pipe_unlock(pipe);
1865
1866        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1867        cs.pipebufs = bufs;
1868        cs.pipe = pipe;
1869
1870        if (flags & SPLICE_F_MOVE)
1871                cs.move_pages = 1;
1872
1873        ret = fuse_dev_do_write(fc, &cs, len);
1874
1875        for (idx = 0; idx < nbuf; idx++) {
1876                struct pipe_buffer *buf = &bufs[idx];
1877                buf->ops->release(pipe, buf);
1878        }
1879out:
1880        kfree(bufs);
1881        return ret;
1882}
1883
1884static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1885{
1886        unsigned mask = POLLOUT | POLLWRNORM;
1887        struct fuse_conn *fc = fuse_get_conn(file);
1888        if (!fc)
1889                return POLLERR;
1890
1891        poll_wait(file, &fc->waitq, wait);
1892
1893        spin_lock(&fc->lock);
1894        if (!fc->connected)
1895                mask = POLLERR;
1896        else if (request_pending(fc))
1897                mask |= POLLIN | POLLRDNORM;
1898        spin_unlock(&fc->lock);
1899
1900        return mask;
1901}
1902
1903/*
1904 * Abort all requests on the given list (pending or processing)
1905 *
1906 * This function releases and reacquires fc->lock
1907 */
1908static void end_requests(struct fuse_conn *fc, struct list_head *head)
1909__releases(fc->lock)
1910__acquires(fc->lock)
1911{
1912        while (!list_empty(head)) {
1913                struct fuse_req *req;
1914                req = list_entry(head->next, struct fuse_req, list);
1915                req->out.h.error = -ECONNABORTED;
1916                request_end(fc, req);
1917                spin_lock(&fc->lock);
1918        }
1919}
1920
1921/*
1922 * Abort requests under I/O
1923 *
1924 * The requests are set to aborted and finished, and the request
1925 * waiter is woken up.  This will make request_wait_answer() wait
1926 * until the request is unlocked and then return.
1927 *
1928 * If the request is asynchronous, then the end function needs to be
1929 * called after waiting for the request to be unlocked (if it was
1930 * locked).
1931 */
1932static void end_io_requests(struct fuse_conn *fc)
1933__releases(fc->lock)
1934__acquires(fc->lock)
1935{
1936        while (!list_empty(&fc->io)) {
1937                struct fuse_req *req =
1938                        list_entry(fc->io.next, struct fuse_req, list);
1939                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1940
1941                req->aborted = 1;
1942                req->out.h.error = -ECONNABORTED;
1943                req->state = FUSE_REQ_FINISHED;
1944                list_del_init(&req->list);
1945                wake_up(&req->waitq);
1946                if (end) {
1947                        req->end = NULL;
1948                        __fuse_get_request(req);
1949                        spin_unlock(&fc->lock);
1950                        wait_event(req->waitq, !req->locked);
1951                        end(fc, req);
1952                        fuse_put_request(fc, req);
1953                        spin_lock(&fc->lock);
1954                }
1955        }
1956}
1957
1958static void end_queued_requests(struct fuse_conn *fc)
1959__releases(fc->lock)
1960__acquires(fc->lock)
1961{
1962        fc->max_background = UINT_MAX;
1963        flush_bg_queue(fc);
1964        end_requests(fc, &fc->pending);
1965        end_requests(fc, &fc->processing);
1966        while (forget_pending(fc))
1967                kfree(dequeue_forget(fc, 1, NULL));
1968}
1969
1970static void end_polls(struct fuse_conn *fc)
1971{
1972        struct rb_node *p;
1973
1974        p = rb_first(&fc->polled_files);
1975
1976        while (p) {
1977                struct fuse_file *ff;
1978                ff = rb_entry(p, struct fuse_file, polled_node);
1979                wake_up_interruptible_all(&ff->poll_wait);
1980
1981                p = rb_next(p);
1982        }
1983}
1984
1985/*
1986 * Abort all requests.
1987 *
1988 * Emergency exit in case of a malicious or accidental deadlock, or
1989 * just a hung filesystem.
1990 *
1991 * The same effect is usually achievable through killing the
1992 * filesystem daemon and all users of the filesystem.  The exception
1993 * is the combination of an asynchronous request and the tricky
1994 * deadlock (see Documentation/filesystems/fuse.txt).
1995 *
1996 * During the aborting, progression of requests from the pending and
1997 * processing lists onto the io list, and progression of new requests
1998 * onto the pending list is prevented by req->connected being false.
1999 *
2000 * Progression of requests under I/O to the processing list is
2001 * prevented by the req->aborted flag being true for these requests.
2002 * For this reason requests on the io list must be aborted first.
2003 */
2004void fuse_abort_conn(struct fuse_conn *fc)
2005{
2006        spin_lock(&fc->lock);
2007        if (fc->connected) {
2008                fc->connected = 0;
2009                fc->blocked = 0;
2010                end_io_requests(fc);
2011                end_queued_requests(fc);
2012                end_polls(fc);
2013                wake_up_all(&fc->waitq);
2014                wake_up_all(&fc->blocked_waitq);
2015                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2016        }
2017        spin_unlock(&fc->lock);
2018}
2019EXPORT_SYMBOL_GPL(fuse_abort_conn);
2020
2021int fuse_dev_release(struct inode *inode, struct file *file)
2022{
2023        struct fuse_conn *fc = fuse_get_conn(file);
2024        if (fc) {
2025                spin_lock(&fc->lock);
2026                fc->connected = 0;
2027                fc->blocked = 0;
2028                end_queued_requests(fc);
2029                end_polls(fc);
2030                wake_up_all(&fc->blocked_waitq);
2031                spin_unlock(&fc->lock);
2032                fuse_conn_put(fc);
2033        }
2034
2035        return 0;
2036}
2037EXPORT_SYMBOL_GPL(fuse_dev_release);
2038
2039static int fuse_dev_fasync(int fd, struct file *file, int on)
2040{
2041        struct fuse_conn *fc = fuse_get_conn(file);
2042        if (!fc)
2043                return -EPERM;
2044
2045        /* No locking - fasync_helper does its own locking */
2046        return fasync_helper(fd, file, on, &fc->fasync);
2047}
2048
2049const struct file_operations fuse_dev_operations = {
2050        .owner          = THIS_MODULE,
2051        .llseek         = no_llseek,
2052        .read           = do_sync_read,
2053        .aio_read       = fuse_dev_read,
2054        .splice_read    = fuse_dev_splice_read,
2055        .write          = do_sync_write,
2056        .aio_write      = fuse_dev_write,
2057        .splice_write   = fuse_dev_splice_write,
2058        .poll           = fuse_dev_poll,
2059        .release        = fuse_dev_release,
2060        .fasync         = fuse_dev_fasync,
2061};
2062EXPORT_SYMBOL_GPL(fuse_dev_operations);
2063
2064static struct miscdevice fuse_miscdevice = {
2065        .minor = FUSE_MINOR,
2066        .name  = "fuse",
2067        .fops = &fuse_dev_operations,
2068};
2069
2070int __init fuse_dev_init(void)
2071{
2072        int err = -ENOMEM;
2073        fuse_req_cachep = kmem_cache_create("fuse_request",
2074                                            sizeof(struct fuse_req),
2075                                            0, 0, NULL);
2076        if (!fuse_req_cachep)
2077                goto out;
2078
2079        err = misc_register(&fuse_miscdevice);
2080        if (err)
2081                goto out_cache_clean;
2082
2083        return 0;
2084
2085 out_cache_clean:
2086        kmem_cache_destroy(fuse_req_cachep);
2087 out:
2088        return err;
2089}
2090
2091void fuse_dev_cleanup(void)
2092{
2093        misc_deregister(&fuse_miscdevice);
2094        kmem_cache_destroy(fuse_req_cachep);
2095}
2096