linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/swap.h>
  21#include <linux/splice.h>
  22
  23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  24MODULE_ALIAS("devname:fuse");
  25
  26static struct kmem_cache *fuse_req_cachep;
  27
  28static struct fuse_conn *fuse_get_conn(struct file *file)
  29{
  30        /*
  31         * Lockless access is OK, because file->private data is set
  32         * once during mount and is valid until the file is released.
  33         */
  34        return file->private_data;
  35}
  36
  37static void fuse_request_init(struct fuse_req *req, struct page **pages,
  38                              struct fuse_page_desc *page_descs,
  39                              unsigned npages)
  40{
  41        memset(req, 0, sizeof(*req));
  42        memset(pages, 0, sizeof(*pages) * npages);
  43        memset(page_descs, 0, sizeof(*page_descs) * npages);
  44        INIT_LIST_HEAD(&req->list);
  45        INIT_LIST_HEAD(&req->intr_entry);
  46        init_waitqueue_head(&req->waitq);
  47        atomic_set(&req->count, 1);
  48        req->pages = pages;
  49        req->page_descs = page_descs;
  50        req->max_pages = npages;
  51}
  52
  53static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
  54{
  55        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
  56        if (req) {
  57                struct page **pages;
  58                struct fuse_page_desc *page_descs;
  59
  60                if (npages <= FUSE_REQ_INLINE_PAGES) {
  61                        pages = req->inline_pages;
  62                        page_descs = req->inline_page_descs;
  63                } else {
  64                        pages = kmalloc(sizeof(struct page *) * npages, flags);
  65                        page_descs = kmalloc(sizeof(struct fuse_page_desc) *
  66                                             npages, flags);
  67                }
  68
  69                if (!pages || !page_descs) {
  70                        kfree(pages);
  71                        kfree(page_descs);
  72                        kmem_cache_free(fuse_req_cachep, req);
  73                        return NULL;
  74                }
  75
  76                fuse_request_init(req, pages, page_descs, npages);
  77        }
  78        return req;
  79}
  80
  81struct fuse_req *fuse_request_alloc(unsigned npages)
  82{
  83        return __fuse_request_alloc(npages, GFP_KERNEL);
  84}
  85EXPORT_SYMBOL_GPL(fuse_request_alloc);
  86
  87struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
  88{
  89        return __fuse_request_alloc(npages, GFP_NOFS);
  90}
  91
  92void fuse_request_free(struct fuse_req *req)
  93{
  94        if (req->pages != req->inline_pages) {
  95                kfree(req->pages);
  96                kfree(req->page_descs);
  97        }
  98        kmem_cache_free(fuse_req_cachep, req);
  99}
 100
 101static void block_sigs(sigset_t *oldset)
 102{
 103        sigset_t mask;
 104
 105        siginitsetinv(&mask, sigmask(SIGKILL));
 106        sigprocmask(SIG_BLOCK, &mask, oldset);
 107}
 108
 109static void restore_sigs(sigset_t *oldset)
 110{
 111        sigprocmask(SIG_SETMASK, oldset, NULL);
 112}
 113
 114static void __fuse_get_request(struct fuse_req *req)
 115{
 116        atomic_inc(&req->count);
 117}
 118
 119/* Must be called with > 1 refcount */
 120static void __fuse_put_request(struct fuse_req *req)
 121{
 122        BUG_ON(atomic_read(&req->count) < 2);
 123        atomic_dec(&req->count);
 124}
 125
 126static void fuse_req_init_context(struct fuse_req *req)
 127{
 128        req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
 129        req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
 130        req->in.h.pid = current->pid;
 131}
 132
 133struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
 134{
 135        struct fuse_req *req;
 136        sigset_t oldset;
 137        int intr;
 138        int err;
 139
 140        atomic_inc(&fc->num_waiting);
 141        block_sigs(&oldset);
 142        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 143        restore_sigs(&oldset);
 144        err = -EINTR;
 145        if (intr)
 146                goto out;
 147
 148        err = -ENOTCONN;
 149        if (!fc->connected)
 150                goto out;
 151
 152        req = fuse_request_alloc(npages);
 153        err = -ENOMEM;
 154        if (!req)
 155                goto out;
 156
 157        fuse_req_init_context(req);
 158        req->waiting = 1;
 159        return req;
 160
 161 out:
 162        atomic_dec(&fc->num_waiting);
 163        return ERR_PTR(err);
 164}
 165EXPORT_SYMBOL_GPL(fuse_get_req);
 166
 167/*
 168 * Return request in fuse_file->reserved_req.  However that may
 169 * currently be in use.  If that is the case, wait for it to become
 170 * available.
 171 */
 172static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 173                                         struct file *file)
 174{
 175        struct fuse_req *req = NULL;
 176        struct fuse_file *ff = file->private_data;
 177
 178        do {
 179                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 180                spin_lock(&fc->lock);
 181                if (ff->reserved_req) {
 182                        req = ff->reserved_req;
 183                        ff->reserved_req = NULL;
 184                        req->stolen_file = get_file(file);
 185                }
 186                spin_unlock(&fc->lock);
 187        } while (!req);
 188
 189        return req;
 190}
 191
 192/*
 193 * Put stolen request back into fuse_file->reserved_req
 194 */
 195static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 196{
 197        struct file *file = req->stolen_file;
 198        struct fuse_file *ff = file->private_data;
 199
 200        spin_lock(&fc->lock);
 201        fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
 202        BUG_ON(ff->reserved_req);
 203        ff->reserved_req = req;
 204        wake_up_all(&fc->reserved_req_waitq);
 205        spin_unlock(&fc->lock);
 206        fput(file);
 207}
 208
 209/*
 210 * Gets a requests for a file operation, always succeeds
 211 *
 212 * This is used for sending the FLUSH request, which must get to
 213 * userspace, due to POSIX locks which may need to be unlocked.
 214 *
 215 * If allocation fails due to OOM, use the reserved request in
 216 * fuse_file.
 217 *
 218 * This is very unlikely to deadlock accidentally, since the
 219 * filesystem should not have it's own file open.  If deadlock is
 220 * intentional, it can still be broken by "aborting" the filesystem.
 221 */
 222struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 223                                             struct file *file)
 224{
 225        struct fuse_req *req;
 226
 227        atomic_inc(&fc->num_waiting);
 228        wait_event(fc->blocked_waitq, !fc->blocked);
 229        req = fuse_request_alloc(0);
 230        if (!req)
 231                req = get_reserved_req(fc, file);
 232
 233        fuse_req_init_context(req);
 234        req->waiting = 1;
 235        return req;
 236}
 237
 238void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 239{
 240        if (atomic_dec_and_test(&req->count)) {
 241                if (req->waiting)
 242                        atomic_dec(&fc->num_waiting);
 243
 244                if (req->stolen_file)
 245                        put_reserved_req(fc, req);
 246                else
 247                        fuse_request_free(req);
 248        }
 249}
 250EXPORT_SYMBOL_GPL(fuse_put_request);
 251
 252static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 253{
 254        unsigned nbytes = 0;
 255        unsigned i;
 256
 257        for (i = 0; i < numargs; i++)
 258                nbytes += args[i].size;
 259
 260        return nbytes;
 261}
 262
 263static u64 fuse_get_unique(struct fuse_conn *fc)
 264{
 265        fc->reqctr++;
 266        /* zero is special */
 267        if (fc->reqctr == 0)
 268                fc->reqctr = 1;
 269
 270        return fc->reqctr;
 271}
 272
 273static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 274{
 275        req->in.h.len = sizeof(struct fuse_in_header) +
 276                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 277        list_add_tail(&req->list, &fc->pending);
 278        req->state = FUSE_REQ_PENDING;
 279        if (!req->waiting) {
 280                req->waiting = 1;
 281                atomic_inc(&fc->num_waiting);
 282        }
 283        wake_up(&fc->waitq);
 284        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 285}
 286
 287void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 288                       u64 nodeid, u64 nlookup)
 289{
 290        forget->forget_one.nodeid = nodeid;
 291        forget->forget_one.nlookup = nlookup;
 292
 293        spin_lock(&fc->lock);
 294        if (fc->connected) {
 295                fc->forget_list_tail->next = forget;
 296                fc->forget_list_tail = forget;
 297                wake_up(&fc->waitq);
 298                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 299        } else {
 300                kfree(forget);
 301        }
 302        spin_unlock(&fc->lock);
 303}
 304
 305static void flush_bg_queue(struct fuse_conn *fc)
 306{
 307        while (fc->active_background < fc->max_background &&
 308               !list_empty(&fc->bg_queue)) {
 309                struct fuse_req *req;
 310
 311                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 312                list_del(&req->list);
 313                fc->active_background++;
 314                req->in.h.unique = fuse_get_unique(fc);
 315                queue_request(fc, req);
 316        }
 317}
 318
 319/*
 320 * This function is called when a request is finished.  Either a reply
 321 * has arrived or it was aborted (and not yet sent) or some error
 322 * occurred during communication with userspace, or the device file
 323 * was closed.  The requester thread is woken up (if still waiting),
 324 * the 'end' callback is called if given, else the reference to the
 325 * request is released
 326 *
 327 * Called with fc->lock, unlocks it
 328 */
 329static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 330__releases(fc->lock)
 331{
 332        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 333        req->end = NULL;
 334        list_del(&req->list);
 335        list_del(&req->intr_entry);
 336        req->state = FUSE_REQ_FINISHED;
 337        if (req->background) {
 338                if (fc->num_background == fc->max_background) {
 339                        fc->blocked = 0;
 340                        wake_up_all(&fc->blocked_waitq);
 341                }
 342                if (fc->num_background == fc->congestion_threshold &&
 343                    fc->connected && fc->bdi_initialized) {
 344                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 345                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 346                }
 347                fc->num_background--;
 348                fc->active_background--;
 349                flush_bg_queue(fc);
 350        }
 351        spin_unlock(&fc->lock);
 352        wake_up(&req->waitq);
 353        if (end)
 354                end(fc, req);
 355        fuse_put_request(fc, req);
 356}
 357
 358static void wait_answer_interruptible(struct fuse_conn *fc,
 359                                      struct fuse_req *req)
 360__releases(fc->lock)
 361__acquires(fc->lock)
 362{
 363        if (signal_pending(current))
 364                return;
 365
 366        spin_unlock(&fc->lock);
 367        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 368        spin_lock(&fc->lock);
 369}
 370
 371static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 372{
 373        list_add_tail(&req->intr_entry, &fc->interrupts);
 374        wake_up(&fc->waitq);
 375        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 376}
 377
 378static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 379__releases(fc->lock)
 380__acquires(fc->lock)
 381{
 382        if (!fc->no_interrupt) {
 383                /* Any signal may interrupt this */
 384                wait_answer_interruptible(fc, req);
 385
 386                if (req->aborted)
 387                        goto aborted;
 388                if (req->state == FUSE_REQ_FINISHED)
 389                        return;
 390
 391                req->interrupted = 1;
 392                if (req->state == FUSE_REQ_SENT)
 393                        queue_interrupt(fc, req);
 394        }
 395
 396        if (!req->force) {
 397                sigset_t oldset;
 398
 399                /* Only fatal signals may interrupt this */
 400                block_sigs(&oldset);
 401                wait_answer_interruptible(fc, req);
 402                restore_sigs(&oldset);
 403
 404                if (req->aborted)
 405                        goto aborted;
 406                if (req->state == FUSE_REQ_FINISHED)
 407                        return;
 408
 409                /* Request is not yet in userspace, bail out */
 410                if (req->state == FUSE_REQ_PENDING) {
 411                        list_del(&req->list);
 412                        __fuse_put_request(req);
 413                        req->out.h.error = -EINTR;
 414                        return;
 415                }
 416        }
 417
 418        /*
 419         * Either request is already in userspace, or it was forced.
 420         * Wait it out.
 421         */
 422        spin_unlock(&fc->lock);
 423        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 424        spin_lock(&fc->lock);
 425
 426        if (!req->aborted)
 427                return;
 428
 429 aborted:
 430        BUG_ON(req->state != FUSE_REQ_FINISHED);
 431        if (req->locked) {
 432                /* This is uninterruptible sleep, because data is
 433                   being copied to/from the buffers of req.  During
 434                   locked state, there mustn't be any filesystem
 435                   operation (e.g. page fault), since that could lead
 436                   to deadlock */
 437                spin_unlock(&fc->lock);
 438                wait_event(req->waitq, !req->locked);
 439                spin_lock(&fc->lock);
 440        }
 441}
 442
 443static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 444{
 445        spin_lock(&fc->lock);
 446        if (!fc->connected)
 447                req->out.h.error = -ENOTCONN;
 448        else if (fc->conn_error)
 449                req->out.h.error = -ECONNREFUSED;
 450        else {
 451                req->in.h.unique = fuse_get_unique(fc);
 452                queue_request(fc, req);
 453                /* acquire extra reference, since request is still needed
 454                   after request_end() */
 455                __fuse_get_request(req);
 456
 457                request_wait_answer(fc, req);
 458        }
 459        spin_unlock(&fc->lock);
 460}
 461
 462void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 463{
 464        req->isreply = 1;
 465        __fuse_request_send(fc, req);
 466}
 467EXPORT_SYMBOL_GPL(fuse_request_send);
 468
 469static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 470                                            struct fuse_req *req)
 471{
 472        req->background = 1;
 473        fc->num_background++;
 474        if (fc->num_background == fc->max_background)
 475                fc->blocked = 1;
 476        if (fc->num_background == fc->congestion_threshold &&
 477            fc->bdi_initialized) {
 478                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 479                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 480        }
 481        list_add_tail(&req->list, &fc->bg_queue);
 482        flush_bg_queue(fc);
 483}
 484
 485static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 486{
 487        spin_lock(&fc->lock);
 488        if (fc->connected) {
 489                fuse_request_send_nowait_locked(fc, req);
 490                spin_unlock(&fc->lock);
 491        } else {
 492                req->out.h.error = -ENOTCONN;
 493                request_end(fc, req);
 494        }
 495}
 496
 497void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 498{
 499        req->isreply = 1;
 500        fuse_request_send_nowait(fc, req);
 501}
 502EXPORT_SYMBOL_GPL(fuse_request_send_background);
 503
 504static int fuse_request_send_notify_reply(struct fuse_conn *fc,
 505                                          struct fuse_req *req, u64 unique)
 506{
 507        int err = -ENODEV;
 508
 509        req->isreply = 0;
 510        req->in.h.unique = unique;
 511        spin_lock(&fc->lock);
 512        if (fc->connected) {
 513                queue_request(fc, req);
 514                err = 0;
 515        }
 516        spin_unlock(&fc->lock);
 517
 518        return err;
 519}
 520
 521/*
 522 * Called under fc->lock
 523 *
 524 * fc->connected must have been checked previously
 525 */
 526void fuse_request_send_background_locked(struct fuse_conn *fc,
 527                                         struct fuse_req *req)
 528{
 529        req->isreply = 1;
 530        fuse_request_send_nowait_locked(fc, req);
 531}
 532
 533void fuse_force_forget(struct file *file, u64 nodeid)
 534{
 535        struct inode *inode = file_inode(file);
 536        struct fuse_conn *fc = get_fuse_conn(inode);
 537        struct fuse_req *req;
 538        struct fuse_forget_in inarg;
 539
 540        memset(&inarg, 0, sizeof(inarg));
 541        inarg.nlookup = 1;
 542        req = fuse_get_req_nofail_nopages(fc, file);
 543        req->in.h.opcode = FUSE_FORGET;
 544        req->in.h.nodeid = nodeid;
 545        req->in.numargs = 1;
 546        req->in.args[0].size = sizeof(inarg);
 547        req->in.args[0].value = &inarg;
 548        req->isreply = 0;
 549        __fuse_request_send(fc, req);
 550        /* ignore errors */
 551        fuse_put_request(fc, req);
 552}
 553
 554/*
 555 * Lock the request.  Up to the next unlock_request() there mustn't be
 556 * anything that could cause a page-fault.  If the request was already
 557 * aborted bail out.
 558 */
 559static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 560{
 561        int err = 0;
 562        if (req) {
 563                spin_lock(&fc->lock);
 564                if (req->aborted)
 565                        err = -ENOENT;
 566                else
 567                        req->locked = 1;
 568                spin_unlock(&fc->lock);
 569        }
 570        return err;
 571}
 572
 573/*
 574 * Unlock request.  If it was aborted during being locked, the
 575 * requester thread is currently waiting for it to be unlocked, so
 576 * wake it up.
 577 */
 578static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 579{
 580        if (req) {
 581                spin_lock(&fc->lock);
 582                req->locked = 0;
 583                if (req->aborted)
 584                        wake_up(&req->waitq);
 585                spin_unlock(&fc->lock);
 586        }
 587}
 588
 589struct fuse_copy_state {
 590        struct fuse_conn *fc;
 591        int write;
 592        struct fuse_req *req;
 593        const struct iovec *iov;
 594        struct pipe_buffer *pipebufs;
 595        struct pipe_buffer *currbuf;
 596        struct pipe_inode_info *pipe;
 597        unsigned long nr_segs;
 598        unsigned long seglen;
 599        unsigned long addr;
 600        struct page *pg;
 601        void *mapaddr;
 602        void *buf;
 603        unsigned len;
 604        unsigned move_pages:1;
 605};
 606
 607static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 608                           int write,
 609                           const struct iovec *iov, unsigned long nr_segs)
 610{
 611        memset(cs, 0, sizeof(*cs));
 612        cs->fc = fc;
 613        cs->write = write;
 614        cs->iov = iov;
 615        cs->nr_segs = nr_segs;
 616}
 617
 618/* Unmap and put previous page of userspace buffer */
 619static void fuse_copy_finish(struct fuse_copy_state *cs)
 620{
 621        if (cs->currbuf) {
 622                struct pipe_buffer *buf = cs->currbuf;
 623
 624                if (!cs->write) {
 625                        buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
 626                } else {
 627                        kunmap(buf->page);
 628                        buf->len = PAGE_SIZE - cs->len;
 629                }
 630                cs->currbuf = NULL;
 631                cs->mapaddr = NULL;
 632        } else if (cs->mapaddr) {
 633                kunmap(cs->pg);
 634                if (cs->write) {
 635                        flush_dcache_page(cs->pg);
 636                        set_page_dirty_lock(cs->pg);
 637                }
 638                put_page(cs->pg);
 639                cs->mapaddr = NULL;
 640        }
 641}
 642
 643/*
 644 * Get another pagefull of userspace buffer, and map it to kernel
 645 * address space, and lock request
 646 */
 647static int fuse_copy_fill(struct fuse_copy_state *cs)
 648{
 649        unsigned long offset;
 650        int err;
 651
 652        unlock_request(cs->fc, cs->req);
 653        fuse_copy_finish(cs);
 654        if (cs->pipebufs) {
 655                struct pipe_buffer *buf = cs->pipebufs;
 656
 657                if (!cs->write) {
 658                        err = buf->ops->confirm(cs->pipe, buf);
 659                        if (err)
 660                                return err;
 661
 662                        BUG_ON(!cs->nr_segs);
 663                        cs->currbuf = buf;
 664                        cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
 665                        cs->len = buf->len;
 666                        cs->buf = cs->mapaddr + buf->offset;
 667                        cs->pipebufs++;
 668                        cs->nr_segs--;
 669                } else {
 670                        struct page *page;
 671
 672                        if (cs->nr_segs == cs->pipe->buffers)
 673                                return -EIO;
 674
 675                        page = alloc_page(GFP_HIGHUSER);
 676                        if (!page)
 677                                return -ENOMEM;
 678
 679                        buf->page = page;
 680                        buf->offset = 0;
 681                        buf->len = 0;
 682
 683                        cs->currbuf = buf;
 684                        cs->mapaddr = kmap(page);
 685                        cs->buf = cs->mapaddr;
 686                        cs->len = PAGE_SIZE;
 687                        cs->pipebufs++;
 688                        cs->nr_segs++;
 689                }
 690        } else {
 691                if (!cs->seglen) {
 692                        BUG_ON(!cs->nr_segs);
 693                        cs->seglen = cs->iov[0].iov_len;
 694                        cs->addr = (unsigned long) cs->iov[0].iov_base;
 695                        cs->iov++;
 696                        cs->nr_segs--;
 697                }
 698                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
 699                if (err < 0)
 700                        return err;
 701                BUG_ON(err != 1);
 702                offset = cs->addr % PAGE_SIZE;
 703                cs->mapaddr = kmap(cs->pg);
 704                cs->buf = cs->mapaddr + offset;
 705                cs->len = min(PAGE_SIZE - offset, cs->seglen);
 706                cs->seglen -= cs->len;
 707                cs->addr += cs->len;
 708        }
 709
 710        return lock_request(cs->fc, cs->req);
 711}
 712
 713/* Do as much copy to/from userspace buffer as we can */
 714static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 715{
 716        unsigned ncpy = min(*size, cs->len);
 717        if (val) {
 718                if (cs->write)
 719                        memcpy(cs->buf, *val, ncpy);
 720                else
 721                        memcpy(*val, cs->buf, ncpy);
 722                *val += ncpy;
 723        }
 724        *size -= ncpy;
 725        cs->len -= ncpy;
 726        cs->buf += ncpy;
 727        return ncpy;
 728}
 729
 730static int fuse_check_page(struct page *page)
 731{
 732        if (page_mapcount(page) ||
 733            page->mapping != NULL ||
 734            page_count(page) != 1 ||
 735            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
 736             ~(1 << PG_locked |
 737               1 << PG_referenced |
 738               1 << PG_uptodate |
 739               1 << PG_lru |
 740               1 << PG_active |
 741               1 << PG_reclaim))) {
 742                printk(KERN_WARNING "fuse: trying to steal weird page\n");
 743                printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
 744                return 1;
 745        }
 746        return 0;
 747}
 748
 749static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 750{
 751        int err;
 752        struct page *oldpage = *pagep;
 753        struct page *newpage;
 754        struct pipe_buffer *buf = cs->pipebufs;
 755
 756        unlock_request(cs->fc, cs->req);
 757        fuse_copy_finish(cs);
 758
 759        err = buf->ops->confirm(cs->pipe, buf);
 760        if (err)
 761                return err;
 762
 763        BUG_ON(!cs->nr_segs);
 764        cs->currbuf = buf;
 765        cs->len = buf->len;
 766        cs->pipebufs++;
 767        cs->nr_segs--;
 768
 769        if (cs->len != PAGE_SIZE)
 770                goto out_fallback;
 771
 772        if (buf->ops->steal(cs->pipe, buf) != 0)
 773                goto out_fallback;
 774
 775        newpage = buf->page;
 776
 777        if (WARN_ON(!PageUptodate(newpage)))
 778                return -EIO;
 779
 780        ClearPageMappedToDisk(newpage);
 781
 782        if (fuse_check_page(newpage) != 0)
 783                goto out_fallback_unlock;
 784
 785        /*
 786         * This is a new and locked page, it shouldn't be mapped or
 787         * have any special flags on it
 788         */
 789        if (WARN_ON(page_mapped(oldpage)))
 790                goto out_fallback_unlock;
 791        if (WARN_ON(page_has_private(oldpage)))
 792                goto out_fallback_unlock;
 793        if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
 794                goto out_fallback_unlock;
 795        if (WARN_ON(PageMlocked(oldpage)))
 796                goto out_fallback_unlock;
 797
 798        err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 799        if (err) {
 800                unlock_page(newpage);
 801                return err;
 802        }
 803
 804        page_cache_get(newpage);
 805
 806        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 807                lru_cache_add_file(newpage);
 808
 809        err = 0;
 810        spin_lock(&cs->fc->lock);
 811        if (cs->req->aborted)
 812                err = -ENOENT;
 813        else
 814                *pagep = newpage;
 815        spin_unlock(&cs->fc->lock);
 816
 817        if (err) {
 818                unlock_page(newpage);
 819                page_cache_release(newpage);
 820                return err;
 821        }
 822
 823        unlock_page(oldpage);
 824        page_cache_release(oldpage);
 825        cs->len = 0;
 826
 827        return 0;
 828
 829out_fallback_unlock:
 830        unlock_page(newpage);
 831out_fallback:
 832        cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
 833        cs->buf = cs->mapaddr + buf->offset;
 834
 835        err = lock_request(cs->fc, cs->req);
 836        if (err)
 837                return err;
 838
 839        return 1;
 840}
 841
 842static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 843                         unsigned offset, unsigned count)
 844{
 845        struct pipe_buffer *buf;
 846
 847        if (cs->nr_segs == cs->pipe->buffers)
 848                return -EIO;
 849
 850        unlock_request(cs->fc, cs->req);
 851        fuse_copy_finish(cs);
 852
 853        buf = cs->pipebufs;
 854        page_cache_get(page);
 855        buf->page = page;
 856        buf->offset = offset;
 857        buf->len = count;
 858
 859        cs->pipebufs++;
 860        cs->nr_segs++;
 861        cs->len = 0;
 862
 863        return 0;
 864}
 865
 866/*
 867 * Copy a page in the request to/from the userspace buffer.  Must be
 868 * done atomically
 869 */
 870static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 871                          unsigned offset, unsigned count, int zeroing)
 872{
 873        int err;
 874        struct page *page = *pagep;
 875
 876        if (page && zeroing && count < PAGE_SIZE)
 877                clear_highpage(page);
 878
 879        while (count) {
 880                if (cs->write && cs->pipebufs && page) {
 881                        return fuse_ref_page(cs, page, offset, count);
 882                } else if (!cs->len) {
 883                        if (cs->move_pages && page &&
 884                            offset == 0 && count == PAGE_SIZE) {
 885                                err = fuse_try_move_page(cs, pagep);
 886                                if (err <= 0)
 887                                        return err;
 888                        } else {
 889                                err = fuse_copy_fill(cs);
 890                                if (err)
 891                                        return err;
 892                        }
 893                }
 894                if (page) {
 895                        void *mapaddr = kmap_atomic(page);
 896                        void *buf = mapaddr + offset;
 897                        offset += fuse_copy_do(cs, &buf, &count);
 898                        kunmap_atomic(mapaddr);
 899                } else
 900                        offset += fuse_copy_do(cs, NULL, &count);
 901        }
 902        if (page && !cs->write)
 903                flush_dcache_page(page);
 904        return 0;
 905}
 906
 907/* Copy pages in the request to/from userspace buffer */
 908static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 909                           int zeroing)
 910{
 911        unsigned i;
 912        struct fuse_req *req = cs->req;
 913
 914        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 915                int err;
 916                unsigned offset = req->page_descs[i].offset;
 917                unsigned count = min(nbytes, req->page_descs[i].length);
 918
 919                err = fuse_copy_page(cs, &req->pages[i], offset, count,
 920                                     zeroing);
 921                if (err)
 922                        return err;
 923
 924                nbytes -= count;
 925        }
 926        return 0;
 927}
 928
 929/* Copy a single argument in the request to/from userspace buffer */
 930static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 931{
 932        while (size) {
 933                if (!cs->len) {
 934                        int err = fuse_copy_fill(cs);
 935                        if (err)
 936                                return err;
 937                }
 938                fuse_copy_do(cs, &val, &size);
 939        }
 940        return 0;
 941}
 942
 943/* Copy request arguments to/from userspace buffer */
 944static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 945                          unsigned argpages, struct fuse_arg *args,
 946                          int zeroing)
 947{
 948        int err = 0;
 949        unsigned i;
 950
 951        for (i = 0; !err && i < numargs; i++)  {
 952                struct fuse_arg *arg = &args[i];
 953                if (i == numargs - 1 && argpages)
 954                        err = fuse_copy_pages(cs, arg->size, zeroing);
 955                else
 956                        err = fuse_copy_one(cs, arg->value, arg->size);
 957        }
 958        return err;
 959}
 960
 961static int forget_pending(struct fuse_conn *fc)
 962{
 963        return fc->forget_list_head.next != NULL;
 964}
 965
 966static int request_pending(struct fuse_conn *fc)
 967{
 968        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
 969                forget_pending(fc);
 970}
 971
 972/* Wait until a request is available on the pending list */
 973static void request_wait(struct fuse_conn *fc)
 974__releases(fc->lock)
 975__acquires(fc->lock)
 976{
 977        DECLARE_WAITQUEUE(wait, current);
 978
 979        add_wait_queue_exclusive(&fc->waitq, &wait);
 980        while (fc->connected && !request_pending(fc)) {
 981                set_current_state(TASK_INTERRUPTIBLE);
 982                if (signal_pending(current))
 983                        break;
 984
 985                spin_unlock(&fc->lock);
 986                schedule();
 987                spin_lock(&fc->lock);
 988        }
 989        set_current_state(TASK_RUNNING);
 990        remove_wait_queue(&fc->waitq, &wait);
 991}
 992
 993/*
 994 * Transfer an interrupt request to userspace
 995 *
 996 * Unlike other requests this is assembled on demand, without a need
 997 * to allocate a separate fuse_req structure.
 998 *
 999 * Called with fc->lock held, releases it
1000 */
1001static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
1002                               size_t nbytes, struct fuse_req *req)
1003__releases(fc->lock)
1004{
1005        struct fuse_in_header ih;
1006        struct fuse_interrupt_in arg;
1007        unsigned reqsize = sizeof(ih) + sizeof(arg);
1008        int err;
1009
1010        list_del_init(&req->intr_entry);
1011        req->intr_unique = fuse_get_unique(fc);
1012        memset(&ih, 0, sizeof(ih));
1013        memset(&arg, 0, sizeof(arg));
1014        ih.len = reqsize;
1015        ih.opcode = FUSE_INTERRUPT;
1016        ih.unique = req->intr_unique;
1017        arg.unique = req->in.h.unique;
1018
1019        spin_unlock(&fc->lock);
1020        if (nbytes < reqsize)
1021                return -EINVAL;
1022
1023        err = fuse_copy_one(cs, &ih, sizeof(ih));
1024        if (!err)
1025                err = fuse_copy_one(cs, &arg, sizeof(arg));
1026        fuse_copy_finish(cs);
1027
1028        return err ? err : reqsize;
1029}
1030
1031static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
1032                                               unsigned max,
1033                                               unsigned *countp)
1034{
1035        struct fuse_forget_link *head = fc->forget_list_head.next;
1036        struct fuse_forget_link **newhead = &head;
1037        unsigned count;
1038
1039        for (count = 0; *newhead != NULL && count < max; count++)
1040                newhead = &(*newhead)->next;
1041
1042        fc->forget_list_head.next = *newhead;
1043        *newhead = NULL;
1044        if (fc->forget_list_head.next == NULL)
1045                fc->forget_list_tail = &fc->forget_list_head;
1046
1047        if (countp != NULL)
1048                *countp = count;
1049
1050        return head;
1051}
1052
1053static int fuse_read_single_forget(struct fuse_conn *fc,
1054                                   struct fuse_copy_state *cs,
1055                                   size_t nbytes)
1056__releases(fc->lock)
1057{
1058        int err;
1059        struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1060        struct fuse_forget_in arg = {
1061                .nlookup = forget->forget_one.nlookup,
1062        };
1063        struct fuse_in_header ih = {
1064                .opcode = FUSE_FORGET,
1065                .nodeid = forget->forget_one.nodeid,
1066                .unique = fuse_get_unique(fc),
1067                .len = sizeof(ih) + sizeof(arg),
1068        };
1069
1070        spin_unlock(&fc->lock);
1071        kfree(forget);
1072        if (nbytes < ih.len)
1073                return -EINVAL;
1074
1075        err = fuse_copy_one(cs, &ih, sizeof(ih));
1076        if (!err)
1077                err = fuse_copy_one(cs, &arg, sizeof(arg));
1078        fuse_copy_finish(cs);
1079
1080        if (err)
1081                return err;
1082
1083        return ih.len;
1084}
1085
1086static int fuse_read_batch_forget(struct fuse_conn *fc,
1087                                   struct fuse_copy_state *cs, size_t nbytes)
1088__releases(fc->lock)
1089{
1090        int err;
1091        unsigned max_forgets;
1092        unsigned count;
1093        struct fuse_forget_link *head;
1094        struct fuse_batch_forget_in arg = { .count = 0 };
1095        struct fuse_in_header ih = {
1096                .opcode = FUSE_BATCH_FORGET,
1097                .unique = fuse_get_unique(fc),
1098                .len = sizeof(ih) + sizeof(arg),
1099        };
1100
1101        if (nbytes < ih.len) {
1102                spin_unlock(&fc->lock);
1103                return -EINVAL;
1104        }
1105
1106        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1107        head = dequeue_forget(fc, max_forgets, &count);
1108        spin_unlock(&fc->lock);
1109
1110        arg.count = count;
1111        ih.len += count * sizeof(struct fuse_forget_one);
1112        err = fuse_copy_one(cs, &ih, sizeof(ih));
1113        if (!err)
1114                err = fuse_copy_one(cs, &arg, sizeof(arg));
1115
1116        while (head) {
1117                struct fuse_forget_link *forget = head;
1118
1119                if (!err) {
1120                        err = fuse_copy_one(cs, &forget->forget_one,
1121                                            sizeof(forget->forget_one));
1122                }
1123                head = forget->next;
1124                kfree(forget);
1125        }
1126
1127        fuse_copy_finish(cs);
1128
1129        if (err)
1130                return err;
1131
1132        return ih.len;
1133}
1134
1135static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1136                            size_t nbytes)
1137__releases(fc->lock)
1138{
1139        if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1140                return fuse_read_single_forget(fc, cs, nbytes);
1141        else
1142                return fuse_read_batch_forget(fc, cs, nbytes);
1143}
1144
1145/*
1146 * Read a single request into the userspace filesystem's buffer.  This
1147 * function waits until a request is available, then removes it from
1148 * the pending list and copies request data to userspace buffer.  If
1149 * no reply is needed (FORGET) or request has been aborted or there
1150 * was an error during the copying then it's finished by calling
1151 * request_end().  Otherwise add it to the processing list, and set
1152 * the 'sent' flag.
1153 */
1154static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1155                                struct fuse_copy_state *cs, size_t nbytes)
1156{
1157        int err;
1158        struct fuse_req *req;
1159        struct fuse_in *in;
1160        unsigned reqsize;
1161
1162 restart:
1163        spin_lock(&fc->lock);
1164        err = -EAGAIN;
1165        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1166            !request_pending(fc))
1167                goto err_unlock;
1168
1169        request_wait(fc);
1170        err = -ENODEV;
1171        if (!fc->connected)
1172                goto err_unlock;
1173        err = -ERESTARTSYS;
1174        if (!request_pending(fc))
1175                goto err_unlock;
1176
1177        if (!list_empty(&fc->interrupts)) {
1178                req = list_entry(fc->interrupts.next, struct fuse_req,
1179                                 intr_entry);
1180                return fuse_read_interrupt(fc, cs, nbytes, req);
1181        }
1182
1183        if (forget_pending(fc)) {
1184                if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1185                        return fuse_read_forget(fc, cs, nbytes);
1186
1187                if (fc->forget_batch <= -8)
1188                        fc->forget_batch = 16;
1189        }
1190
1191        req = list_entry(fc->pending.next, struct fuse_req, list);
1192        req->state = FUSE_REQ_READING;
1193        list_move(&req->list, &fc->io);
1194
1195        in = &req->in;
1196        reqsize = in->h.len;
1197        /* If request is too large, reply with an error and restart the read */
1198        if (nbytes < reqsize) {
1199                req->out.h.error = -EIO;
1200                /* SETXATTR is special, since it may contain too large data */
1201                if (in->h.opcode == FUSE_SETXATTR)
1202                        req->out.h.error = -E2BIG;
1203                request_end(fc, req);
1204                goto restart;
1205        }
1206        spin_unlock(&fc->lock);
1207        cs->req = req;
1208        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1209        if (!err)
1210                err = fuse_copy_args(cs, in->numargs, in->argpages,
1211                                     (struct fuse_arg *) in->args, 0);
1212        fuse_copy_finish(cs);
1213        spin_lock(&fc->lock);
1214        req->locked = 0;
1215        if (req->aborted) {
1216                request_end(fc, req);
1217                return -ENODEV;
1218        }
1219        if (err) {
1220                req->out.h.error = -EIO;
1221                request_end(fc, req);
1222                return err;
1223        }
1224        if (!req->isreply)
1225                request_end(fc, req);
1226        else {
1227                req->state = FUSE_REQ_SENT;
1228                list_move_tail(&req->list, &fc->processing);
1229                if (req->interrupted)
1230                        queue_interrupt(fc, req);
1231                spin_unlock(&fc->lock);
1232        }
1233        return reqsize;
1234
1235 err_unlock:
1236        spin_unlock(&fc->lock);
1237        return err;
1238}
1239
1240static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1241                              unsigned long nr_segs, loff_t pos)
1242{
1243        struct fuse_copy_state cs;
1244        struct file *file = iocb->ki_filp;
1245        struct fuse_conn *fc = fuse_get_conn(file);
1246        if (!fc)
1247                return -EPERM;
1248
1249        fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1250
1251        return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1252}
1253
1254static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1255                                   struct pipe_buffer *buf)
1256{
1257        return 1;
1258}
1259
1260static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1261        .can_merge = 0,
1262        .map = generic_pipe_buf_map,
1263        .unmap = generic_pipe_buf_unmap,
1264        .confirm = generic_pipe_buf_confirm,
1265        .release = generic_pipe_buf_release,
1266        .steal = fuse_dev_pipe_buf_steal,
1267        .get = generic_pipe_buf_get,
1268};
1269
1270static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1271                                    struct pipe_inode_info *pipe,
1272                                    size_t len, unsigned int flags)
1273{
1274        int ret;
1275        int page_nr = 0;
1276        int do_wakeup = 0;
1277        struct pipe_buffer *bufs;
1278        struct fuse_copy_state cs;
1279        struct fuse_conn *fc = fuse_get_conn(in);
1280        if (!fc)
1281                return -EPERM;
1282
1283        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1284        if (!bufs)
1285                return -ENOMEM;
1286
1287        fuse_copy_init(&cs, fc, 1, NULL, 0);
1288        cs.pipebufs = bufs;
1289        cs.pipe = pipe;
1290        ret = fuse_dev_do_read(fc, in, &cs, len);
1291        if (ret < 0)
1292                goto out;
1293
1294        ret = 0;
1295        pipe_lock(pipe);
1296
1297        if (!pipe->readers) {
1298                send_sig(SIGPIPE, current, 0);
1299                if (!ret)
1300                        ret = -EPIPE;
1301                goto out_unlock;
1302        }
1303
1304        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1305                ret = -EIO;
1306                goto out_unlock;
1307        }
1308
1309        while (page_nr < cs.nr_segs) {
1310                int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1311                struct pipe_buffer *buf = pipe->bufs + newbuf;
1312
1313                buf->page = bufs[page_nr].page;
1314                buf->offset = bufs[page_nr].offset;
1315                buf->len = bufs[page_nr].len;
1316                buf->ops = &fuse_dev_pipe_buf_ops;
1317
1318                pipe->nrbufs++;
1319                page_nr++;
1320                ret += buf->len;
1321
1322                if (pipe->inode)
1323                        do_wakeup = 1;
1324        }
1325
1326out_unlock:
1327        pipe_unlock(pipe);
1328
1329        if (do_wakeup) {
1330                smp_mb();
1331                if (waitqueue_active(&pipe->wait))
1332                        wake_up_interruptible(&pipe->wait);
1333                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1334        }
1335
1336out:
1337        for (; page_nr < cs.nr_segs; page_nr++)
1338                page_cache_release(bufs[page_nr].page);
1339
1340        kfree(bufs);
1341        return ret;
1342}
1343
1344static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1345                            struct fuse_copy_state *cs)
1346{
1347        struct fuse_notify_poll_wakeup_out outarg;
1348        int err = -EINVAL;
1349
1350        if (size != sizeof(outarg))
1351                goto err;
1352
1353        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1354        if (err)
1355                goto err;
1356
1357        fuse_copy_finish(cs);
1358        return fuse_notify_poll_wakeup(fc, &outarg);
1359
1360err:
1361        fuse_copy_finish(cs);
1362        return err;
1363}
1364
1365static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1366                                   struct fuse_copy_state *cs)
1367{
1368        struct fuse_notify_inval_inode_out outarg;
1369        int err = -EINVAL;
1370
1371        if (size != sizeof(outarg))
1372                goto err;
1373
1374        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1375        if (err)
1376                goto err;
1377        fuse_copy_finish(cs);
1378
1379        down_read(&fc->killsb);
1380        err = -ENOENT;
1381        if (fc->sb) {
1382                err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1383                                               outarg.off, outarg.len);
1384        }
1385        up_read(&fc->killsb);
1386        return err;
1387
1388err:
1389        fuse_copy_finish(cs);
1390        return err;
1391}
1392
1393static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1394                                   struct fuse_copy_state *cs)
1395{
1396        struct fuse_notify_inval_entry_out outarg;
1397        int err = -ENOMEM;
1398        char *buf;
1399        struct qstr name;
1400
1401        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1402        if (!buf)
1403                goto err;
1404
1405        err = -EINVAL;
1406        if (size < sizeof(outarg))
1407                goto err;
1408
1409        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1410        if (err)
1411                goto err;
1412
1413        err = -ENAMETOOLONG;
1414        if (outarg.namelen > FUSE_NAME_MAX)
1415                goto err;
1416
1417        err = -EINVAL;
1418        if (size != sizeof(outarg) + outarg.namelen + 1)
1419                goto err;
1420
1421        name.name = buf;
1422        name.len = outarg.namelen;
1423        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1424        if (err)
1425                goto err;
1426        fuse_copy_finish(cs);
1427        buf[outarg.namelen] = 0;
1428        name.hash = full_name_hash(name.name, name.len);
1429
1430        down_read(&fc->killsb);
1431        err = -ENOENT;
1432        if (fc->sb)
1433                err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1434        up_read(&fc->killsb);
1435        kfree(buf);
1436        return err;
1437
1438err:
1439        kfree(buf);
1440        fuse_copy_finish(cs);
1441        return err;
1442}
1443
1444static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1445                              struct fuse_copy_state *cs)
1446{
1447        struct fuse_notify_delete_out outarg;
1448        int err = -ENOMEM;
1449        char *buf;
1450        struct qstr name;
1451
1452        buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1453        if (!buf)
1454                goto err;
1455
1456        err = -EINVAL;
1457        if (size < sizeof(outarg))
1458                goto err;
1459
1460        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1461        if (err)
1462                goto err;
1463
1464        err = -ENAMETOOLONG;
1465        if (outarg.namelen > FUSE_NAME_MAX)
1466                goto err;
1467
1468        err = -EINVAL;
1469        if (size != sizeof(outarg) + outarg.namelen + 1)
1470                goto err;
1471
1472        name.name = buf;
1473        name.len = outarg.namelen;
1474        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1475        if (err)
1476                goto err;
1477        fuse_copy_finish(cs);
1478        buf[outarg.namelen] = 0;
1479        name.hash = full_name_hash(name.name, name.len);
1480
1481        down_read(&fc->killsb);
1482        err = -ENOENT;
1483        if (fc->sb)
1484                err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1485                                               outarg.child, &name);
1486        up_read(&fc->killsb);
1487        kfree(buf);
1488        return err;
1489
1490err:
1491        kfree(buf);
1492        fuse_copy_finish(cs);
1493        return err;
1494}
1495
1496static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1497                             struct fuse_copy_state *cs)
1498{
1499        struct fuse_notify_store_out outarg;
1500        struct inode *inode;
1501        struct address_space *mapping;
1502        u64 nodeid;
1503        int err;
1504        pgoff_t index;
1505        unsigned int offset;
1506        unsigned int num;
1507        loff_t file_size;
1508        loff_t end;
1509
1510        err = -EINVAL;
1511        if (size < sizeof(outarg))
1512                goto out_finish;
1513
1514        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1515        if (err)
1516                goto out_finish;
1517
1518        err = -EINVAL;
1519        if (size - sizeof(outarg) != outarg.size)
1520                goto out_finish;
1521
1522        nodeid = outarg.nodeid;
1523
1524        down_read(&fc->killsb);
1525
1526        err = -ENOENT;
1527        if (!fc->sb)
1528                goto out_up_killsb;
1529
1530        inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1531        if (!inode)
1532                goto out_up_killsb;
1533
1534        mapping = inode->i_mapping;
1535        index = outarg.offset >> PAGE_CACHE_SHIFT;
1536        offset = outarg.offset & ~PAGE_CACHE_MASK;
1537        file_size = i_size_read(inode);
1538        end = outarg.offset + outarg.size;
1539        if (end > file_size) {
1540                file_size = end;
1541                fuse_write_update_size(inode, file_size);
1542        }
1543
1544        num = outarg.size;
1545        while (num) {
1546                struct page *page;
1547                unsigned int this_num;
1548
1549                err = -ENOMEM;
1550                page = find_or_create_page(mapping, index,
1551                                           mapping_gfp_mask(mapping));
1552                if (!page)
1553                        goto out_iput;
1554
1555                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1556                err = fuse_copy_page(cs, &page, offset, this_num, 0);
1557                if (!err && offset == 0 && (num != 0 || file_size == end))
1558                        SetPageUptodate(page);
1559                unlock_page(page);
1560                page_cache_release(page);
1561
1562                if (err)
1563                        goto out_iput;
1564
1565                num -= this_num;
1566                offset = 0;
1567                index++;
1568        }
1569
1570        err = 0;
1571
1572out_iput:
1573        iput(inode);
1574out_up_killsb:
1575        up_read(&fc->killsb);
1576out_finish:
1577        fuse_copy_finish(cs);
1578        return err;
1579}
1580
1581static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1582{
1583        release_pages(req->pages, req->num_pages, 0);
1584}
1585
1586static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1587                         struct fuse_notify_retrieve_out *outarg)
1588{
1589        int err;
1590        struct address_space *mapping = inode->i_mapping;
1591        struct fuse_req *req;
1592        pgoff_t index;
1593        loff_t file_size;
1594        unsigned int num;
1595        unsigned int offset;
1596        size_t total_len = 0;
1597        int num_pages;
1598
1599        offset = outarg->offset & ~PAGE_CACHE_MASK;
1600        file_size = i_size_read(inode);
1601
1602        num = outarg->size;
1603        if (outarg->offset > file_size)
1604                num = 0;
1605        else if (outarg->offset + num > file_size)
1606                num = file_size - outarg->offset;
1607
1608        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609        num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611        req = fuse_get_req(fc, num_pages);
1612        if (IS_ERR(req))
1613                return PTR_ERR(req);
1614
1615        req->in.h.opcode = FUSE_NOTIFY_REPLY;
1616        req->in.h.nodeid = outarg->nodeid;
1617        req->in.numargs = 2;
1618        req->in.argpages = 1;
1619        req->page_descs[0].offset = offset;
1620        req->end = fuse_retrieve_end;
1621
1622        index = outarg->offset >> PAGE_CACHE_SHIFT;
1623
1624        while (num && req->num_pages < num_pages) {
1625                struct page *page;
1626                unsigned int this_num;
1627
1628                page = find_get_page(mapping, index);
1629                if (!page)
1630                        break;
1631
1632                this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1633                req->pages[req->num_pages] = page;
1634                req->page_descs[req->num_pages].length = this_num;
1635                req->num_pages++;
1636
1637                offset = 0;
1638                num -= this_num;
1639                total_len += this_num;
1640                index++;
1641        }
1642        req->misc.retrieve_in.offset = outarg->offset;
1643        req->misc.retrieve_in.size = total_len;
1644        req->in.args[0].size = sizeof(req->misc.retrieve_in);
1645        req->in.args[0].value = &req->misc.retrieve_in;
1646        req->in.args[1].size = total_len;
1647
1648        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1649        if (err)
1650                fuse_retrieve_end(fc, req);
1651
1652        return err;
1653}
1654
1655static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1656                                struct fuse_copy_state *cs)
1657{
1658        struct fuse_notify_retrieve_out outarg;
1659        struct inode *inode;
1660        int err;
1661
1662        err = -EINVAL;
1663        if (size != sizeof(outarg))
1664                goto copy_finish;
1665
1666        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1667        if (err)
1668                goto copy_finish;
1669
1670        fuse_copy_finish(cs);
1671
1672        down_read(&fc->killsb);
1673        err = -ENOENT;
1674        if (fc->sb) {
1675                u64 nodeid = outarg.nodeid;
1676
1677                inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1678                if (inode) {
1679                        err = fuse_retrieve(fc, inode, &outarg);
1680                        iput(inode);
1681                }
1682        }
1683        up_read(&fc->killsb);
1684
1685        return err;
1686
1687copy_finish:
1688        fuse_copy_finish(cs);
1689        return err;
1690}
1691
1692static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1693                       unsigned int size, struct fuse_copy_state *cs)
1694{
1695        switch (code) {
1696        case FUSE_NOTIFY_POLL:
1697                return fuse_notify_poll(fc, size, cs);
1698
1699        case FUSE_NOTIFY_INVAL_INODE:
1700                return fuse_notify_inval_inode(fc, size, cs);
1701
1702        case FUSE_NOTIFY_INVAL_ENTRY:
1703                return fuse_notify_inval_entry(fc, size, cs);
1704
1705        case FUSE_NOTIFY_STORE:
1706                return fuse_notify_store(fc, size, cs);
1707
1708        case FUSE_NOTIFY_RETRIEVE:
1709                return fuse_notify_retrieve(fc, size, cs);
1710
1711        case FUSE_NOTIFY_DELETE:
1712                return fuse_notify_delete(fc, size, cs);
1713
1714        default:
1715                fuse_copy_finish(cs);
1716                return -EINVAL;
1717        }
1718}
1719
1720/* Look up request on processing list by unique ID */
1721static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1722{
1723        struct list_head *entry;
1724
1725        list_for_each(entry, &fc->processing) {
1726                struct fuse_req *req;
1727                req = list_entry(entry, struct fuse_req, list);
1728                if (req->in.h.unique == unique || req->intr_unique == unique)
1729                        return req;
1730        }
1731        return NULL;
1732}
1733
1734static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1735                         unsigned nbytes)
1736{
1737        unsigned reqsize = sizeof(struct fuse_out_header);
1738
1739        if (out->h.error)
1740                return nbytes != reqsize ? -EINVAL : 0;
1741
1742        reqsize += len_args(out->numargs, out->args);
1743
1744        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1745                return -EINVAL;
1746        else if (reqsize > nbytes) {
1747                struct fuse_arg *lastarg = &out->args[out->numargs-1];
1748                unsigned diffsize = reqsize - nbytes;
1749                if (diffsize > lastarg->size)
1750                        return -EINVAL;
1751                lastarg->size -= diffsize;
1752        }
1753        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1754                              out->page_zeroing);
1755}
1756
1757/*
1758 * Write a single reply to a request.  First the header is copied from
1759 * the write buffer.  The request is then searched on the processing
1760 * list by the unique ID found in the header.  If found, then remove
1761 * it from the list and copy the rest of the buffer to the request.
1762 * The request is finished by calling request_end()
1763 */
1764static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1765                                 struct fuse_copy_state *cs, size_t nbytes)
1766{
1767        int err;
1768        struct fuse_req *req;
1769        struct fuse_out_header oh;
1770
1771        if (nbytes < sizeof(struct fuse_out_header))
1772                return -EINVAL;
1773
1774        err = fuse_copy_one(cs, &oh, sizeof(oh));
1775        if (err)
1776                goto err_finish;
1777
1778        err = -EINVAL;
1779        if (oh.len != nbytes)
1780                goto err_finish;
1781
1782        /*
1783         * Zero oh.unique indicates unsolicited notification message
1784         * and error contains notification code.
1785         */
1786        if (!oh.unique) {
1787                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1788                return err ? err : nbytes;
1789        }
1790
1791        err = -EINVAL;
1792        if (oh.error <= -1000 || oh.error > 0)
1793                goto err_finish;
1794
1795        spin_lock(&fc->lock);
1796        err = -ENOENT;
1797        if (!fc->connected)
1798                goto err_unlock;
1799
1800        req = request_find(fc, oh.unique);
1801        if (!req)
1802                goto err_unlock;
1803
1804        if (req->aborted) {
1805                spin_unlock(&fc->lock);
1806                fuse_copy_finish(cs);
1807                spin_lock(&fc->lock);
1808                request_end(fc, req);
1809                return -ENOENT;
1810        }
1811        /* Is it an interrupt reply? */
1812        if (req->intr_unique == oh.unique) {
1813                err = -EINVAL;
1814                if (nbytes != sizeof(struct fuse_out_header))
1815                        goto err_unlock;
1816
1817                if (oh.error == -ENOSYS)
1818                        fc->no_interrupt = 1;
1819                else if (oh.error == -EAGAIN)
1820                        queue_interrupt(fc, req);
1821
1822                spin_unlock(&fc->lock);
1823                fuse_copy_finish(cs);
1824                return nbytes;
1825        }
1826
1827        req->state = FUSE_REQ_WRITING;
1828        list_move(&req->list, &fc->io);
1829        req->out.h = oh;
1830        req->locked = 1;
1831        cs->req = req;
1832        if (!req->out.page_replace)
1833                cs->move_pages = 0;
1834        spin_unlock(&fc->lock);
1835
1836        err = copy_out_args(cs, &req->out, nbytes);
1837        fuse_copy_finish(cs);
1838
1839        spin_lock(&fc->lock);
1840        req->locked = 0;
1841        if (!err) {
1842                if (req->aborted)
1843                        err = -ENOENT;
1844        } else if (!req->aborted)
1845                req->out.h.error = -EIO;
1846        request_end(fc, req);
1847
1848        return err ? err : nbytes;
1849
1850 err_unlock:
1851        spin_unlock(&fc->lock);
1852 err_finish:
1853        fuse_copy_finish(cs);
1854        return err;
1855}
1856
1857static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1858                              unsigned long nr_segs, loff_t pos)
1859{
1860        struct fuse_copy_state cs;
1861        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1862        if (!fc)
1863                return -EPERM;
1864
1865        fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1866
1867        return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1868}
1869
1870static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1871                                     struct file *out, loff_t *ppos,
1872                                     size_t len, unsigned int flags)
1873{
1874        unsigned nbuf;
1875        unsigned idx;
1876        struct pipe_buffer *bufs;
1877        struct fuse_copy_state cs;
1878        struct fuse_conn *fc;
1879        size_t rem;
1880        ssize_t ret;
1881
1882        fc = fuse_get_conn(out);
1883        if (!fc)
1884                return -EPERM;
1885
1886        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1887        if (!bufs)
1888                return -ENOMEM;
1889
1890        pipe_lock(pipe);
1891        nbuf = 0;
1892        rem = 0;
1893        for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1894                rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1895
1896        ret = -EINVAL;
1897        if (rem < len) {
1898                pipe_unlock(pipe);
1899                goto out;
1900        }
1901
1902        rem = len;
1903        while (rem) {
1904                struct pipe_buffer *ibuf;
1905                struct pipe_buffer *obuf;
1906
1907                BUG_ON(nbuf >= pipe->buffers);
1908                BUG_ON(!pipe->nrbufs);
1909                ibuf = &pipe->bufs[pipe->curbuf];
1910                obuf = &bufs[nbuf];
1911
1912                if (rem >= ibuf->len) {
1913                        *obuf = *ibuf;
1914                        ibuf->ops = NULL;
1915                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1916                        pipe->nrbufs--;
1917                } else {
1918                        ibuf->ops->get(pipe, ibuf);
1919                        *obuf = *ibuf;
1920                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1921                        obuf->len = rem;
1922                        ibuf->offset += obuf->len;
1923                        ibuf->len -= obuf->len;
1924                }
1925                nbuf++;
1926                rem -= obuf->len;
1927        }
1928        pipe_unlock(pipe);
1929
1930        fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1931        cs.pipebufs = bufs;
1932        cs.pipe = pipe;
1933
1934        if (flags & SPLICE_F_MOVE)
1935                cs.move_pages = 1;
1936
1937        ret = fuse_dev_do_write(fc, &cs, len);
1938
1939        for (idx = 0; idx < nbuf; idx++) {
1940                struct pipe_buffer *buf = &bufs[idx];
1941                buf->ops->release(pipe, buf);
1942        }
1943out:
1944        kfree(bufs);
1945        return ret;
1946}
1947
1948static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1949{
1950        unsigned mask = POLLOUT | POLLWRNORM;
1951        struct fuse_conn *fc = fuse_get_conn(file);
1952        if (!fc)
1953                return POLLERR;
1954
1955        poll_wait(file, &fc->waitq, wait);
1956
1957        spin_lock(&fc->lock);
1958        if (!fc->connected)
1959                mask = POLLERR;
1960        else if (request_pending(fc))
1961                mask |= POLLIN | POLLRDNORM;
1962        spin_unlock(&fc->lock);
1963
1964        return mask;
1965}
1966
1967/*
1968 * Abort all requests on the given list (pending or processing)
1969 *
1970 * This function releases and reacquires fc->lock
1971 */
1972static void end_requests(struct fuse_conn *fc, struct list_head *head)
1973__releases(fc->lock)
1974__acquires(fc->lock)
1975{
1976        while (!list_empty(head)) {
1977                struct fuse_req *req;
1978                req = list_entry(head->next, struct fuse_req, list);
1979                req->out.h.error = -ECONNABORTED;
1980                request_end(fc, req);
1981                spin_lock(&fc->lock);
1982        }
1983}
1984
1985/*
1986 * Abort requests under I/O
1987 *
1988 * The requests are set to aborted and finished, and the request
1989 * waiter is woken up.  This will make request_wait_answer() wait
1990 * until the request is unlocked and then return.
1991 *
1992 * If the request is asynchronous, then the end function needs to be
1993 * called after waiting for the request to be unlocked (if it was
1994 * locked).
1995 */
1996static void end_io_requests(struct fuse_conn *fc)
1997__releases(fc->lock)
1998__acquires(fc->lock)
1999{
2000        while (!list_empty(&fc->io)) {
2001                struct fuse_req *req =
2002                        list_entry(fc->io.next, struct fuse_req, list);
2003                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
2004
2005                req->aborted = 1;
2006                req->out.h.error = -ECONNABORTED;
2007                req->state = FUSE_REQ_FINISHED;
2008                list_del_init(&req->list);
2009                wake_up(&req->waitq);
2010                if (end) {
2011                        req->end = NULL;
2012                        __fuse_get_request(req);
2013                        spin_unlock(&fc->lock);
2014                        wait_event(req->waitq, !req->locked);
2015                        end(fc, req);
2016                        fuse_put_request(fc, req);
2017                        spin_lock(&fc->lock);
2018                }
2019        }
2020}
2021
2022static void end_queued_requests(struct fuse_conn *fc)
2023__releases(fc->lock)
2024__acquires(fc->lock)
2025{
2026        fc->max_background = UINT_MAX;
2027        flush_bg_queue(fc);
2028        end_requests(fc, &fc->pending);
2029        end_requests(fc, &fc->processing);
2030        while (forget_pending(fc))
2031                kfree(dequeue_forget(fc, 1, NULL));
2032}
2033
2034static void end_polls(struct fuse_conn *fc)
2035{
2036        struct rb_node *p;
2037
2038        p = rb_first(&fc->polled_files);
2039
2040        while (p) {
2041                struct fuse_file *ff;
2042                ff = rb_entry(p, struct fuse_file, polled_node);
2043                wake_up_interruptible_all(&ff->poll_wait);
2044
2045                p = rb_next(p);
2046        }
2047}
2048
2049/*
2050 * Abort all requests.
2051 *
2052 * Emergency exit in case of a malicious or accidental deadlock, or
2053 * just a hung filesystem.
2054 *
2055 * The same effect is usually achievable through killing the
2056 * filesystem daemon and all users of the filesystem.  The exception
2057 * is the combination of an asynchronous request and the tricky
2058 * deadlock (see Documentation/filesystems/fuse.txt).
2059 *
2060 * During the aborting, progression of requests from the pending and
2061 * processing lists onto the io list, and progression of new requests
2062 * onto the pending list is prevented by req->connected being false.
2063 *
2064 * Progression of requests under I/O to the processing list is
2065 * prevented by the req->aborted flag being true for these requests.
2066 * For this reason requests on the io list must be aborted first.
2067 */
2068void fuse_abort_conn(struct fuse_conn *fc)
2069{
2070        spin_lock(&fc->lock);
2071        if (fc->connected) {
2072                fc->connected = 0;
2073                fc->blocked = 0;
2074                end_io_requests(fc);
2075                end_queued_requests(fc);
2076                end_polls(fc);
2077                wake_up_all(&fc->waitq);
2078                wake_up_all(&fc->blocked_waitq);
2079                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2080        }
2081        spin_unlock(&fc->lock);
2082}
2083EXPORT_SYMBOL_GPL(fuse_abort_conn);
2084
2085int fuse_dev_release(struct inode *inode, struct file *file)
2086{
2087        struct fuse_conn *fc = fuse_get_conn(file);
2088        if (fc) {
2089                spin_lock(&fc->lock);
2090                fc->connected = 0;
2091                fc->blocked = 0;
2092                end_queued_requests(fc);
2093                end_polls(fc);
2094                wake_up_all(&fc->blocked_waitq);
2095                spin_unlock(&fc->lock);
2096                fuse_conn_put(fc);
2097        }
2098
2099        return 0;
2100}
2101EXPORT_SYMBOL_GPL(fuse_dev_release);
2102
2103static int fuse_dev_fasync(int fd, struct file *file, int on)
2104{
2105        struct fuse_conn *fc = fuse_get_conn(file);
2106        if (!fc)
2107                return -EPERM;
2108
2109        /* No locking - fasync_helper does its own locking */
2110        return fasync_helper(fd, file, on, &fc->fasync);
2111}
2112
2113const struct file_operations fuse_dev_operations = {
2114        .owner          = THIS_MODULE,
2115        .llseek         = no_llseek,
2116        .read           = do_sync_read,
2117        .aio_read       = fuse_dev_read,
2118        .splice_read    = fuse_dev_splice_read,
2119        .write          = do_sync_write,
2120        .aio_write      = fuse_dev_write,
2121        .splice_write   = fuse_dev_splice_write,
2122        .poll           = fuse_dev_poll,
2123        .release        = fuse_dev_release,
2124        .fasync         = fuse_dev_fasync,
2125};
2126EXPORT_SYMBOL_GPL(fuse_dev_operations);
2127
2128static struct miscdevice fuse_miscdevice = {
2129        .minor = FUSE_MINOR,
2130        .name  = "fuse",
2131        .fops = &fuse_dev_operations,
2132};
2133
2134int __init fuse_dev_init(void)
2135{
2136        int err = -ENOMEM;
2137        fuse_req_cachep = kmem_cache_create("fuse_request",
2138                                            sizeof(struct fuse_req),
2139                                            0, 0, NULL);
2140        if (!fuse_req_cachep)
2141                goto out;
2142
2143        err = misc_register(&fuse_miscdevice);
2144        if (err)
2145                goto out_cache_clean;
2146
2147        return 0;
2148
2149 out_cache_clean:
2150        kmem_cache_destroy(fuse_req_cachep);
2151 out:
2152        return err;
2153}
2154
2155void fuse_dev_cleanup(void)
2156{
2157        misc_deregister(&fuse_miscdevice);
2158        kmem_cache_destroy(fuse_req_cachep);
2159}
2160