linux/fs/fuse/dev.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/poll.h>
  14#include <linux/uio.h>
  15#include <linux/miscdevice.h>
  16#include <linux/pagemap.h>
  17#include <linux/file.h>
  18#include <linux/slab.h>
  19
  20MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  21
  22static struct kmem_cache *fuse_req_cachep;
  23
  24static struct fuse_conn *fuse_get_conn(struct file *file)
  25{
  26        /*
  27         * Lockless access is OK, because file->private data is set
  28         * once during mount and is valid until the file is released.
  29         */
  30        return file->private_data;
  31}
  32
  33static void fuse_request_init(struct fuse_req *req)
  34{
  35        memset(req, 0, sizeof(*req));
  36        INIT_LIST_HEAD(&req->list);
  37        INIT_LIST_HEAD(&req->intr_entry);
  38        init_waitqueue_head(&req->waitq);
  39        atomic_set(&req->count, 1);
  40}
  41
  42struct fuse_req *fuse_request_alloc(void)
  43{
  44        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
  45        if (req)
  46                fuse_request_init(req);
  47        return req;
  48}
  49EXPORT_SYMBOL_GPL(fuse_request_alloc);
  50
  51struct fuse_req *fuse_request_alloc_nofs(void)
  52{
  53        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
  54        if (req)
  55                fuse_request_init(req);
  56        return req;
  57}
  58
  59void fuse_request_free(struct fuse_req *req)
  60{
  61        kmem_cache_free(fuse_req_cachep, req);
  62}
  63
  64static void block_sigs(sigset_t *oldset)
  65{
  66        sigset_t mask;
  67
  68        siginitsetinv(&mask, sigmask(SIGKILL));
  69        sigprocmask(SIG_BLOCK, &mask, oldset);
  70}
  71
  72static void restore_sigs(sigset_t *oldset)
  73{
  74        sigprocmask(SIG_SETMASK, oldset, NULL);
  75}
  76
  77static void __fuse_get_request(struct fuse_req *req)
  78{
  79        atomic_inc(&req->count);
  80}
  81
  82/* Must be called with > 1 refcount */
  83static void __fuse_put_request(struct fuse_req *req)
  84{
  85        BUG_ON(atomic_read(&req->count) < 2);
  86        atomic_dec(&req->count);
  87}
  88
  89static void fuse_req_init_context(struct fuse_req *req)
  90{
  91        req->in.h.uid = current_fsuid();
  92        req->in.h.gid = current_fsgid();
  93        req->in.h.pid = current->pid;
  94}
  95
  96struct fuse_req *fuse_get_req(struct fuse_conn *fc)
  97{
  98        struct fuse_req *req;
  99        sigset_t oldset;
 100        int intr;
 101        int err;
 102
 103        atomic_inc(&fc->num_waiting);
 104        block_sigs(&oldset);
 105        intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
 106        restore_sigs(&oldset);
 107        err = -EINTR;
 108        if (intr)
 109                goto out;
 110
 111        err = -ENOTCONN;
 112        if (!fc->connected)
 113                goto out;
 114
 115        req = fuse_request_alloc();
 116        err = -ENOMEM;
 117        if (!req)
 118                goto out;
 119
 120        fuse_req_init_context(req);
 121        req->waiting = 1;
 122        return req;
 123
 124 out:
 125        atomic_dec(&fc->num_waiting);
 126        return ERR_PTR(err);
 127}
 128EXPORT_SYMBOL_GPL(fuse_get_req);
 129
 130/*
 131 * Return request in fuse_file->reserved_req.  However that may
 132 * currently be in use.  If that is the case, wait for it to become
 133 * available.
 134 */
 135static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 136                                         struct file *file)
 137{
 138        struct fuse_req *req = NULL;
 139        struct fuse_file *ff = file->private_data;
 140
 141        do {
 142                wait_event(fc->reserved_req_waitq, ff->reserved_req);
 143                spin_lock(&fc->lock);
 144                if (ff->reserved_req) {
 145                        req = ff->reserved_req;
 146                        ff->reserved_req = NULL;
 147                        get_file(file);
 148                        req->stolen_file = file;
 149                }
 150                spin_unlock(&fc->lock);
 151        } while (!req);
 152
 153        return req;
 154}
 155
 156/*
 157 * Put stolen request back into fuse_file->reserved_req
 158 */
 159static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
 160{
 161        struct file *file = req->stolen_file;
 162        struct fuse_file *ff = file->private_data;
 163
 164        spin_lock(&fc->lock);
 165        fuse_request_init(req);
 166        BUG_ON(ff->reserved_req);
 167        ff->reserved_req = req;
 168        wake_up_all(&fc->reserved_req_waitq);
 169        spin_unlock(&fc->lock);
 170        fput(file);
 171}
 172
 173/*
 174 * Gets a requests for a file operation, always succeeds
 175 *
 176 * This is used for sending the FLUSH request, which must get to
 177 * userspace, due to POSIX locks which may need to be unlocked.
 178 *
 179 * If allocation fails due to OOM, use the reserved request in
 180 * fuse_file.
 181 *
 182 * This is very unlikely to deadlock accidentally, since the
 183 * filesystem should not have it's own file open.  If deadlock is
 184 * intentional, it can still be broken by "aborting" the filesystem.
 185 */
 186struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
 187{
 188        struct fuse_req *req;
 189
 190        atomic_inc(&fc->num_waiting);
 191        wait_event(fc->blocked_waitq, !fc->blocked);
 192        req = fuse_request_alloc();
 193        if (!req)
 194                req = get_reserved_req(fc, file);
 195
 196        fuse_req_init_context(req);
 197        req->waiting = 1;
 198        return req;
 199}
 200
 201void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 202{
 203        if (atomic_dec_and_test(&req->count)) {
 204                if (req->waiting)
 205                        atomic_dec(&fc->num_waiting);
 206
 207                if (req->stolen_file)
 208                        put_reserved_req(fc, req);
 209                else
 210                        fuse_request_free(req);
 211        }
 212}
 213EXPORT_SYMBOL_GPL(fuse_put_request);
 214
 215static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 216{
 217        unsigned nbytes = 0;
 218        unsigned i;
 219
 220        for (i = 0; i < numargs; i++)
 221                nbytes += args[i].size;
 222
 223        return nbytes;
 224}
 225
 226static u64 fuse_get_unique(struct fuse_conn *fc)
 227{
 228        fc->reqctr++;
 229        /* zero is special */
 230        if (fc->reqctr == 0)
 231                fc->reqctr = 1;
 232
 233        return fc->reqctr;
 234}
 235
 236static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 237{
 238        req->in.h.unique = fuse_get_unique(fc);
 239        req->in.h.len = sizeof(struct fuse_in_header) +
 240                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
 241        list_add_tail(&req->list, &fc->pending);
 242        req->state = FUSE_REQ_PENDING;
 243        if (!req->waiting) {
 244                req->waiting = 1;
 245                atomic_inc(&fc->num_waiting);
 246        }
 247        wake_up(&fc->waitq);
 248        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 249}
 250
 251static void flush_bg_queue(struct fuse_conn *fc)
 252{
 253        while (fc->active_background < fc->max_background &&
 254               !list_empty(&fc->bg_queue)) {
 255                struct fuse_req *req;
 256
 257                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
 258                list_del(&req->list);
 259                fc->active_background++;
 260                queue_request(fc, req);
 261        }
 262}
 263
 264/*
 265 * This function is called when a request is finished.  Either a reply
 266 * has arrived or it was aborted (and not yet sent) or some error
 267 * occurred during communication with userspace, or the device file
 268 * was closed.  The requester thread is woken up (if still waiting),
 269 * the 'end' callback is called if given, else the reference to the
 270 * request is released
 271 *
 272 * Called with fc->lock, unlocks it
 273 */
 274static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 275__releases(&fc->lock)
 276{
 277        void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 278        req->end = NULL;
 279        list_del(&req->list);
 280        list_del(&req->intr_entry);
 281        req->state = FUSE_REQ_FINISHED;
 282        if (req->background) {
 283                if (fc->num_background == fc->max_background) {
 284                        fc->blocked = 0;
 285                        wake_up_all(&fc->blocked_waitq);
 286                }
 287                if (fc->num_background == fc->congestion_threshold &&
 288                    fc->connected && fc->bdi_initialized) {
 289                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 290                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 291                }
 292                fc->num_background--;
 293                fc->active_background--;
 294                flush_bg_queue(fc);
 295        }
 296        spin_unlock(&fc->lock);
 297        wake_up(&req->waitq);
 298        if (end)
 299                end(fc, req);
 300        fuse_put_request(fc, req);
 301}
 302
 303static void wait_answer_interruptible(struct fuse_conn *fc,
 304                                      struct fuse_req *req)
 305__releases(&fc->lock)
 306__acquires(&fc->lock)
 307{
 308        if (signal_pending(current))
 309                return;
 310
 311        spin_unlock(&fc->lock);
 312        wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
 313        spin_lock(&fc->lock);
 314}
 315
 316static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 317{
 318        list_add_tail(&req->intr_entry, &fc->interrupts);
 319        wake_up(&fc->waitq);
 320        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 321}
 322
 323static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 324__releases(&fc->lock)
 325__acquires(&fc->lock)
 326{
 327        if (!fc->no_interrupt) {
 328                /* Any signal may interrupt this */
 329                wait_answer_interruptible(fc, req);
 330
 331                if (req->aborted)
 332                        goto aborted;
 333                if (req->state == FUSE_REQ_FINISHED)
 334                        return;
 335
 336                req->interrupted = 1;
 337                if (req->state == FUSE_REQ_SENT)
 338                        queue_interrupt(fc, req);
 339        }
 340
 341        if (!req->force) {
 342                sigset_t oldset;
 343
 344                /* Only fatal signals may interrupt this */
 345                block_sigs(&oldset);
 346                wait_answer_interruptible(fc, req);
 347                restore_sigs(&oldset);
 348
 349                if (req->aborted)
 350                        goto aborted;
 351                if (req->state == FUSE_REQ_FINISHED)
 352                        return;
 353
 354                /* Request is not yet in userspace, bail out */
 355                if (req->state == FUSE_REQ_PENDING) {
 356                        list_del(&req->list);
 357                        __fuse_put_request(req);
 358                        req->out.h.error = -EINTR;
 359                        return;
 360                }
 361        }
 362
 363        /*
 364         * Either request is already in userspace, or it was forced.
 365         * Wait it out.
 366         */
 367        spin_unlock(&fc->lock);
 368        wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 369        spin_lock(&fc->lock);
 370
 371        if (!req->aborted)
 372                return;
 373
 374 aborted:
 375        BUG_ON(req->state != FUSE_REQ_FINISHED);
 376        if (req->locked) {
 377                /* This is uninterruptible sleep, because data is
 378                   being copied to/from the buffers of req.  During
 379                   locked state, there mustn't be any filesystem
 380                   operation (e.g. page fault), since that could lead
 381                   to deadlock */
 382                spin_unlock(&fc->lock);
 383                wait_event(req->waitq, !req->locked);
 384                spin_lock(&fc->lock);
 385        }
 386}
 387
 388void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 389{
 390        req->isreply = 1;
 391        spin_lock(&fc->lock);
 392        if (!fc->connected)
 393                req->out.h.error = -ENOTCONN;
 394        else if (fc->conn_error)
 395                req->out.h.error = -ECONNREFUSED;
 396        else {
 397                queue_request(fc, req);
 398                /* acquire extra reference, since request is still needed
 399                   after request_end() */
 400                __fuse_get_request(req);
 401
 402                request_wait_answer(fc, req);
 403        }
 404        spin_unlock(&fc->lock);
 405}
 406EXPORT_SYMBOL_GPL(fuse_request_send);
 407
 408static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 409                                            struct fuse_req *req)
 410{
 411        req->background = 1;
 412        fc->num_background++;
 413        if (fc->num_background == fc->max_background)
 414                fc->blocked = 1;
 415        if (fc->num_background == fc->congestion_threshold &&
 416            fc->bdi_initialized) {
 417                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
 418                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 419        }
 420        list_add_tail(&req->list, &fc->bg_queue);
 421        flush_bg_queue(fc);
 422}
 423
 424static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 425{
 426        spin_lock(&fc->lock);
 427        if (fc->connected) {
 428                fuse_request_send_nowait_locked(fc, req);
 429                spin_unlock(&fc->lock);
 430        } else {
 431                req->out.h.error = -ENOTCONN;
 432                request_end(fc, req);
 433        }
 434}
 435
 436void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
 437{
 438        req->isreply = 0;
 439        fuse_request_send_nowait(fc, req);
 440}
 441
 442void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 443{
 444        req->isreply = 1;
 445        fuse_request_send_nowait(fc, req);
 446}
 447EXPORT_SYMBOL_GPL(fuse_request_send_background);
 448
 449/*
 450 * Called under fc->lock
 451 *
 452 * fc->connected must have been checked previously
 453 */
 454void fuse_request_send_background_locked(struct fuse_conn *fc,
 455                                         struct fuse_req *req)
 456{
 457        req->isreply = 1;
 458        fuse_request_send_nowait_locked(fc, req);
 459}
 460
 461/*
 462 * Lock the request.  Up to the next unlock_request() there mustn't be
 463 * anything that could cause a page-fault.  If the request was already
 464 * aborted bail out.
 465 */
 466static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
 467{
 468        int err = 0;
 469        if (req) {
 470                spin_lock(&fc->lock);
 471                if (req->aborted)
 472                        err = -ENOENT;
 473                else
 474                        req->locked = 1;
 475                spin_unlock(&fc->lock);
 476        }
 477        return err;
 478}
 479
 480/*
 481 * Unlock request.  If it was aborted during being locked, the
 482 * requester thread is currently waiting for it to be unlocked, so
 483 * wake it up.
 484 */
 485static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
 486{
 487        if (req) {
 488                spin_lock(&fc->lock);
 489                req->locked = 0;
 490                if (req->aborted)
 491                        wake_up(&req->waitq);
 492                spin_unlock(&fc->lock);
 493        }
 494}
 495
 496struct fuse_copy_state {
 497        struct fuse_conn *fc;
 498        int write;
 499        struct fuse_req *req;
 500        const struct iovec *iov;
 501        unsigned long nr_segs;
 502        unsigned long seglen;
 503        unsigned long addr;
 504        struct page *pg;
 505        void *mapaddr;
 506        void *buf;
 507        unsigned len;
 508};
 509
 510static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
 511                           int write, struct fuse_req *req,
 512                           const struct iovec *iov, unsigned long nr_segs)
 513{
 514        memset(cs, 0, sizeof(*cs));
 515        cs->fc = fc;
 516        cs->write = write;
 517        cs->req = req;
 518        cs->iov = iov;
 519        cs->nr_segs = nr_segs;
 520}
 521
 522/* Unmap and put previous page of userspace buffer */
 523static void fuse_copy_finish(struct fuse_copy_state *cs)
 524{
 525        if (cs->mapaddr) {
 526                kunmap_atomic(cs->mapaddr, KM_USER0);
 527                if (cs->write) {
 528                        flush_dcache_page(cs->pg);
 529                        set_page_dirty_lock(cs->pg);
 530                }
 531                put_page(cs->pg);
 532                cs->mapaddr = NULL;
 533        }
 534}
 535
 536/*
 537 * Get another pagefull of userspace buffer, and map it to kernel
 538 * address space, and lock request
 539 */
 540static int fuse_copy_fill(struct fuse_copy_state *cs)
 541{
 542        unsigned long offset;
 543        int err;
 544
 545        unlock_request(cs->fc, cs->req);
 546        fuse_copy_finish(cs);
 547        if (!cs->seglen) {
 548                BUG_ON(!cs->nr_segs);
 549                cs->seglen = cs->iov[0].iov_len;
 550                cs->addr = (unsigned long) cs->iov[0].iov_base;
 551                cs->iov++;
 552                cs->nr_segs--;
 553        }
 554        down_read(&current->mm->mmap_sem);
 555        err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
 556                             &cs->pg, NULL);
 557        up_read(&current->mm->mmap_sem);
 558        if (err < 0)
 559                return err;
 560        BUG_ON(err != 1);
 561        offset = cs->addr % PAGE_SIZE;
 562        cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
 563        cs->buf = cs->mapaddr + offset;
 564        cs->len = min(PAGE_SIZE - offset, cs->seglen);
 565        cs->seglen -= cs->len;
 566        cs->addr += cs->len;
 567
 568        return lock_request(cs->fc, cs->req);
 569}
 570
 571/* Do as much copy to/from userspace buffer as we can */
 572static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 573{
 574        unsigned ncpy = min(*size, cs->len);
 575        if (val) {
 576                if (cs->write)
 577                        memcpy(cs->buf, *val, ncpy);
 578                else
 579                        memcpy(*val, cs->buf, ncpy);
 580                *val += ncpy;
 581        }
 582        *size -= ncpy;
 583        cs->len -= ncpy;
 584        cs->buf += ncpy;
 585        return ncpy;
 586}
 587
 588/*
 589 * Copy a page in the request to/from the userspace buffer.  Must be
 590 * done atomically
 591 */
 592static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
 593                          unsigned offset, unsigned count, int zeroing)
 594{
 595        if (page && zeroing && count < PAGE_SIZE) {
 596                void *mapaddr = kmap_atomic(page, KM_USER1);
 597                memset(mapaddr, 0, PAGE_SIZE);
 598                kunmap_atomic(mapaddr, KM_USER1);
 599        }
 600        while (count) {
 601                if (!cs->len) {
 602                        int err = fuse_copy_fill(cs);
 603                        if (err)
 604                                return err;
 605                }
 606                if (page) {
 607                        void *mapaddr = kmap_atomic(page, KM_USER1);
 608                        void *buf = mapaddr + offset;
 609                        offset += fuse_copy_do(cs, &buf, &count);
 610                        kunmap_atomic(mapaddr, KM_USER1);
 611                } else
 612                        offset += fuse_copy_do(cs, NULL, &count);
 613        }
 614        if (page && !cs->write)
 615                flush_dcache_page(page);
 616        return 0;
 617}
 618
 619/* Copy pages in the request to/from userspace buffer */
 620static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
 621                           int zeroing)
 622{
 623        unsigned i;
 624        struct fuse_req *req = cs->req;
 625        unsigned offset = req->page_offset;
 626        unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
 627
 628        for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
 629                struct page *page = req->pages[i];
 630                int err = fuse_copy_page(cs, page, offset, count, zeroing);
 631                if (err)
 632                        return err;
 633
 634                nbytes -= count;
 635                count = min(nbytes, (unsigned) PAGE_SIZE);
 636                offset = 0;
 637        }
 638        return 0;
 639}
 640
 641/* Copy a single argument in the request to/from userspace buffer */
 642static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 643{
 644        while (size) {
 645                if (!cs->len) {
 646                        int err = fuse_copy_fill(cs);
 647                        if (err)
 648                                return err;
 649                }
 650                fuse_copy_do(cs, &val, &size);
 651        }
 652        return 0;
 653}
 654
 655/* Copy request arguments to/from userspace buffer */
 656static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 657                          unsigned argpages, struct fuse_arg *args,
 658                          int zeroing)
 659{
 660        int err = 0;
 661        unsigned i;
 662
 663        for (i = 0; !err && i < numargs; i++)  {
 664                struct fuse_arg *arg = &args[i];
 665                if (i == numargs - 1 && argpages)
 666                        err = fuse_copy_pages(cs, arg->size, zeroing);
 667                else
 668                        err = fuse_copy_one(cs, arg->value, arg->size);
 669        }
 670        return err;
 671}
 672
 673static int request_pending(struct fuse_conn *fc)
 674{
 675        return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
 676}
 677
 678/* Wait until a request is available on the pending list */
 679static void request_wait(struct fuse_conn *fc)
 680__releases(&fc->lock)
 681__acquires(&fc->lock)
 682{
 683        DECLARE_WAITQUEUE(wait, current);
 684
 685        add_wait_queue_exclusive(&fc->waitq, &wait);
 686        while (fc->connected && !request_pending(fc)) {
 687                set_current_state(TASK_INTERRUPTIBLE);
 688                if (signal_pending(current))
 689                        break;
 690
 691                spin_unlock(&fc->lock);
 692                schedule();
 693                spin_lock(&fc->lock);
 694        }
 695        set_current_state(TASK_RUNNING);
 696        remove_wait_queue(&fc->waitq, &wait);
 697}
 698
 699/*
 700 * Transfer an interrupt request to userspace
 701 *
 702 * Unlike other requests this is assembled on demand, without a need
 703 * to allocate a separate fuse_req structure.
 704 *
 705 * Called with fc->lock held, releases it
 706 */
 707static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
 708                               const struct iovec *iov, unsigned long nr_segs)
 709__releases(&fc->lock)
 710{
 711        struct fuse_copy_state cs;
 712        struct fuse_in_header ih;
 713        struct fuse_interrupt_in arg;
 714        unsigned reqsize = sizeof(ih) + sizeof(arg);
 715        int err;
 716
 717        list_del_init(&req->intr_entry);
 718        req->intr_unique = fuse_get_unique(fc);
 719        memset(&ih, 0, sizeof(ih));
 720        memset(&arg, 0, sizeof(arg));
 721        ih.len = reqsize;
 722        ih.opcode = FUSE_INTERRUPT;
 723        ih.unique = req->intr_unique;
 724        arg.unique = req->in.h.unique;
 725
 726        spin_unlock(&fc->lock);
 727        if (iov_length(iov, nr_segs) < reqsize)
 728                return -EINVAL;
 729
 730        fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
 731        err = fuse_copy_one(&cs, &ih, sizeof(ih));
 732        if (!err)
 733                err = fuse_copy_one(&cs, &arg, sizeof(arg));
 734        fuse_copy_finish(&cs);
 735
 736        return err ? err : reqsize;
 737}
 738
 739/*
 740 * Read a single request into the userspace filesystem's buffer.  This
 741 * function waits until a request is available, then removes it from
 742 * the pending list and copies request data to userspace buffer.  If
 743 * no reply is needed (FORGET) or request has been aborted or there
 744 * was an error during the copying then it's finished by calling
 745 * request_end().  Otherwise add it to the processing list, and set
 746 * the 'sent' flag.
 747 */
 748static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
 749                              unsigned long nr_segs, loff_t pos)
 750{
 751        int err;
 752        struct fuse_req *req;
 753        struct fuse_in *in;
 754        struct fuse_copy_state cs;
 755        unsigned reqsize;
 756        struct file *file = iocb->ki_filp;
 757        struct fuse_conn *fc = fuse_get_conn(file);
 758        if (!fc)
 759                return -EPERM;
 760
 761 restart:
 762        spin_lock(&fc->lock);
 763        err = -EAGAIN;
 764        if ((file->f_flags & O_NONBLOCK) && fc->connected &&
 765            !request_pending(fc))
 766                goto err_unlock;
 767
 768        request_wait(fc);
 769        err = -ENODEV;
 770        if (!fc->connected)
 771                goto err_unlock;
 772        err = -ERESTARTSYS;
 773        if (!request_pending(fc))
 774                goto err_unlock;
 775
 776        if (!list_empty(&fc->interrupts)) {
 777                req = list_entry(fc->interrupts.next, struct fuse_req,
 778                                 intr_entry);
 779                return fuse_read_interrupt(fc, req, iov, nr_segs);
 780        }
 781
 782        req = list_entry(fc->pending.next, struct fuse_req, list);
 783        req->state = FUSE_REQ_READING;
 784        list_move(&req->list, &fc->io);
 785
 786        in = &req->in;
 787        reqsize = in->h.len;
 788        /* If request is too large, reply with an error and restart the read */
 789        if (iov_length(iov, nr_segs) < reqsize) {
 790                req->out.h.error = -EIO;
 791                /* SETXATTR is special, since it may contain too large data */
 792                if (in->h.opcode == FUSE_SETXATTR)
 793                        req->out.h.error = -E2BIG;
 794                request_end(fc, req);
 795                goto restart;
 796        }
 797        spin_unlock(&fc->lock);
 798        fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
 799        err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
 800        if (!err)
 801                err = fuse_copy_args(&cs, in->numargs, in->argpages,
 802                                     (struct fuse_arg *) in->args, 0);
 803        fuse_copy_finish(&cs);
 804        spin_lock(&fc->lock);
 805        req->locked = 0;
 806        if (req->aborted) {
 807                request_end(fc, req);
 808                return -ENODEV;
 809        }
 810        if (err) {
 811                req->out.h.error = -EIO;
 812                request_end(fc, req);
 813                return err;
 814        }
 815        if (!req->isreply)
 816                request_end(fc, req);
 817        else {
 818                req->state = FUSE_REQ_SENT;
 819                list_move_tail(&req->list, &fc->processing);
 820                if (req->interrupted)
 821                        queue_interrupt(fc, req);
 822                spin_unlock(&fc->lock);
 823        }
 824        return reqsize;
 825
 826 err_unlock:
 827        spin_unlock(&fc->lock);
 828        return err;
 829}
 830
 831static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
 832                            struct fuse_copy_state *cs)
 833{
 834        struct fuse_notify_poll_wakeup_out outarg;
 835        int err = -EINVAL;
 836
 837        if (size != sizeof(outarg))
 838                goto err;
 839
 840        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
 841        if (err)
 842                goto err;
 843
 844        fuse_copy_finish(cs);
 845        return fuse_notify_poll_wakeup(fc, &outarg);
 846
 847err:
 848        fuse_copy_finish(cs);
 849        return err;
 850}
 851
 852static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
 853                                   struct fuse_copy_state *cs)
 854{
 855        struct fuse_notify_inval_inode_out outarg;
 856        int err = -EINVAL;
 857
 858        if (size != sizeof(outarg))
 859                goto err;
 860
 861        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
 862        if (err)
 863                goto err;
 864        fuse_copy_finish(cs);
 865
 866        down_read(&fc->killsb);
 867        err = -ENOENT;
 868        if (!fc->sb)
 869                goto err_unlock;
 870
 871        err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
 872                                       outarg.off, outarg.len);
 873
 874err_unlock:
 875        up_read(&fc->killsb);
 876        return err;
 877
 878err:
 879        fuse_copy_finish(cs);
 880        return err;
 881}
 882
 883static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
 884                                   struct fuse_copy_state *cs)
 885{
 886        struct fuse_notify_inval_entry_out outarg;
 887        int err = -EINVAL;
 888        char buf[FUSE_NAME_MAX+1];
 889        struct qstr name;
 890
 891        if (size < sizeof(outarg))
 892                goto err;
 893
 894        err = fuse_copy_one(cs, &outarg, sizeof(outarg));
 895        if (err)
 896                goto err;
 897
 898        err = -ENAMETOOLONG;
 899        if (outarg.namelen > FUSE_NAME_MAX)
 900                goto err;
 901
 902        name.name = buf;
 903        name.len = outarg.namelen;
 904        err = fuse_copy_one(cs, buf, outarg.namelen + 1);
 905        if (err)
 906                goto err;
 907        fuse_copy_finish(cs);
 908        buf[outarg.namelen] = 0;
 909        name.hash = full_name_hash(name.name, name.len);
 910
 911        down_read(&fc->killsb);
 912        err = -ENOENT;
 913        if (!fc->sb)
 914                goto err_unlock;
 915
 916        err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
 917
 918err_unlock:
 919        up_read(&fc->killsb);
 920        return err;
 921
 922err:
 923        fuse_copy_finish(cs);
 924        return err;
 925}
 926
 927static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 928                       unsigned int size, struct fuse_copy_state *cs)
 929{
 930        switch (code) {
 931        case FUSE_NOTIFY_POLL:
 932                return fuse_notify_poll(fc, size, cs);
 933
 934        case FUSE_NOTIFY_INVAL_INODE:
 935                return fuse_notify_inval_inode(fc, size, cs);
 936
 937        case FUSE_NOTIFY_INVAL_ENTRY:
 938                return fuse_notify_inval_entry(fc, size, cs);
 939
 940        default:
 941                fuse_copy_finish(cs);
 942                return -EINVAL;
 943        }
 944}
 945
 946/* Look up request on processing list by unique ID */
 947static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 948{
 949        struct list_head *entry;
 950
 951        list_for_each(entry, &fc->processing) {
 952                struct fuse_req *req;
 953                req = list_entry(entry, struct fuse_req, list);
 954                if (req->in.h.unique == unique || req->intr_unique == unique)
 955                        return req;
 956        }
 957        return NULL;
 958}
 959
 960static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
 961                         unsigned nbytes)
 962{
 963        unsigned reqsize = sizeof(struct fuse_out_header);
 964
 965        if (out->h.error)
 966                return nbytes != reqsize ? -EINVAL : 0;
 967
 968        reqsize += len_args(out->numargs, out->args);
 969
 970        if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
 971                return -EINVAL;
 972        else if (reqsize > nbytes) {
 973                struct fuse_arg *lastarg = &out->args[out->numargs-1];
 974                unsigned diffsize = reqsize - nbytes;
 975                if (diffsize > lastarg->size)
 976                        return -EINVAL;
 977                lastarg->size -= diffsize;
 978        }
 979        return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
 980                              out->page_zeroing);
 981}
 982
 983/*
 984 * Write a single reply to a request.  First the header is copied from
 985 * the write buffer.  The request is then searched on the processing
 986 * list by the unique ID found in the header.  If found, then remove
 987 * it from the list and copy the rest of the buffer to the request.
 988 * The request is finished by calling request_end()
 989 */
 990static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
 991                               unsigned long nr_segs, loff_t pos)
 992{
 993        int err;
 994        size_t nbytes = iov_length(iov, nr_segs);
 995        struct fuse_req *req;
 996        struct fuse_out_header oh;
 997        struct fuse_copy_state cs;
 998        struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
 999        if (!fc)
1000                return -EPERM;
1001
1002        fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1003        if (nbytes < sizeof(struct fuse_out_header))
1004                return -EINVAL;
1005
1006        err = fuse_copy_one(&cs, &oh, sizeof(oh));
1007        if (err)
1008                goto err_finish;
1009
1010        err = -EINVAL;
1011        if (oh.len != nbytes)
1012                goto err_finish;
1013
1014        /*
1015         * Zero oh.unique indicates unsolicited notification message
1016         * and error contains notification code.
1017         */
1018        if (!oh.unique) {
1019                err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
1020                return err ? err : nbytes;
1021        }
1022
1023        err = -EINVAL;
1024        if (oh.error <= -1000 || oh.error > 0)
1025                goto err_finish;
1026
1027        spin_lock(&fc->lock);
1028        err = -ENOENT;
1029        if (!fc->connected)
1030                goto err_unlock;
1031
1032        req = request_find(fc, oh.unique);
1033        if (!req)
1034                goto err_unlock;
1035
1036        if (req->aborted) {
1037                spin_unlock(&fc->lock);
1038                fuse_copy_finish(&cs);
1039                spin_lock(&fc->lock);
1040                request_end(fc, req);
1041                return -ENOENT;
1042        }
1043        /* Is it an interrupt reply? */
1044        if (req->intr_unique == oh.unique) {
1045                err = -EINVAL;
1046                if (nbytes != sizeof(struct fuse_out_header))
1047                        goto err_unlock;
1048
1049                if (oh.error == -ENOSYS)
1050                        fc->no_interrupt = 1;
1051                else if (oh.error == -EAGAIN)
1052                        queue_interrupt(fc, req);
1053
1054                spin_unlock(&fc->lock);
1055                fuse_copy_finish(&cs);
1056                return nbytes;
1057        }
1058
1059        req->state = FUSE_REQ_WRITING;
1060        list_move(&req->list, &fc->io);
1061        req->out.h = oh;
1062        req->locked = 1;
1063        cs.req = req;
1064        spin_unlock(&fc->lock);
1065
1066        err = copy_out_args(&cs, &req->out, nbytes);
1067        fuse_copy_finish(&cs);
1068
1069        spin_lock(&fc->lock);
1070        req->locked = 0;
1071        if (!err) {
1072                if (req->aborted)
1073                        err = -ENOENT;
1074        } else if (!req->aborted)
1075                req->out.h.error = -EIO;
1076        request_end(fc, req);
1077
1078        return err ? err : nbytes;
1079
1080 err_unlock:
1081        spin_unlock(&fc->lock);
1082 err_finish:
1083        fuse_copy_finish(&cs);
1084        return err;
1085}
1086
1087static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1088{
1089        unsigned mask = POLLOUT | POLLWRNORM;
1090        struct fuse_conn *fc = fuse_get_conn(file);
1091        if (!fc)
1092                return POLLERR;
1093
1094        poll_wait(file, &fc->waitq, wait);
1095
1096        spin_lock(&fc->lock);
1097        if (!fc->connected)
1098                mask = POLLERR;
1099        else if (request_pending(fc))
1100                mask |= POLLIN | POLLRDNORM;
1101        spin_unlock(&fc->lock);
1102
1103        return mask;
1104}
1105
1106/*
1107 * Abort all requests on the given list (pending or processing)
1108 *
1109 * This function releases and reacquires fc->lock
1110 */
1111static void end_requests(struct fuse_conn *fc, struct list_head *head)
1112__releases(&fc->lock)
1113__acquires(&fc->lock)
1114{
1115        while (!list_empty(head)) {
1116                struct fuse_req *req;
1117                req = list_entry(head->next, struct fuse_req, list);
1118                req->out.h.error = -ECONNABORTED;
1119                request_end(fc, req);
1120                spin_lock(&fc->lock);
1121        }
1122}
1123
1124/*
1125 * Abort requests under I/O
1126 *
1127 * The requests are set to aborted and finished, and the request
1128 * waiter is woken up.  This will make request_wait_answer() wait
1129 * until the request is unlocked and then return.
1130 *
1131 * If the request is asynchronous, then the end function needs to be
1132 * called after waiting for the request to be unlocked (if it was
1133 * locked).
1134 */
1135static void end_io_requests(struct fuse_conn *fc)
1136__releases(&fc->lock)
1137__acquires(&fc->lock)
1138{
1139        while (!list_empty(&fc->io)) {
1140                struct fuse_req *req =
1141                        list_entry(fc->io.next, struct fuse_req, list);
1142                void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1143
1144                req->aborted = 1;
1145                req->out.h.error = -ECONNABORTED;
1146                req->state = FUSE_REQ_FINISHED;
1147                list_del_init(&req->list);
1148                wake_up(&req->waitq);
1149                if (end) {
1150                        req->end = NULL;
1151                        __fuse_get_request(req);
1152                        spin_unlock(&fc->lock);
1153                        wait_event(req->waitq, !req->locked);
1154                        end(fc, req);
1155                        fuse_put_request(fc, req);
1156                        spin_lock(&fc->lock);
1157                }
1158        }
1159}
1160
1161/*
1162 * Abort all requests.
1163 *
1164 * Emergency exit in case of a malicious or accidental deadlock, or
1165 * just a hung filesystem.
1166 *
1167 * The same effect is usually achievable through killing the
1168 * filesystem daemon and all users of the filesystem.  The exception
1169 * is the combination of an asynchronous request and the tricky
1170 * deadlock (see Documentation/filesystems/fuse.txt).
1171 *
1172 * During the aborting, progression of requests from the pending and
1173 * processing lists onto the io list, and progression of new requests
1174 * onto the pending list is prevented by req->connected being false.
1175 *
1176 * Progression of requests under I/O to the processing list is
1177 * prevented by the req->aborted flag being true for these requests.
1178 * For this reason requests on the io list must be aborted first.
1179 */
1180void fuse_abort_conn(struct fuse_conn *fc)
1181{
1182        spin_lock(&fc->lock);
1183        if (fc->connected) {
1184                fc->connected = 0;
1185                fc->blocked = 0;
1186                end_io_requests(fc);
1187                end_requests(fc, &fc->pending);
1188                end_requests(fc, &fc->processing);
1189                wake_up_all(&fc->waitq);
1190                wake_up_all(&fc->blocked_waitq);
1191                kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1192        }
1193        spin_unlock(&fc->lock);
1194}
1195EXPORT_SYMBOL_GPL(fuse_abort_conn);
1196
1197int fuse_dev_release(struct inode *inode, struct file *file)
1198{
1199        struct fuse_conn *fc = fuse_get_conn(file);
1200        if (fc) {
1201                spin_lock(&fc->lock);
1202                fc->connected = 0;
1203                end_requests(fc, &fc->pending);
1204                end_requests(fc, &fc->processing);
1205                spin_unlock(&fc->lock);
1206                fuse_conn_put(fc);
1207        }
1208
1209        return 0;
1210}
1211EXPORT_SYMBOL_GPL(fuse_dev_release);
1212
1213static int fuse_dev_fasync(int fd, struct file *file, int on)
1214{
1215        struct fuse_conn *fc = fuse_get_conn(file);
1216        if (!fc)
1217                return -EPERM;
1218
1219        /* No locking - fasync_helper does its own locking */
1220        return fasync_helper(fd, file, on, &fc->fasync);
1221}
1222
1223const struct file_operations fuse_dev_operations = {
1224        .owner          = THIS_MODULE,
1225        .llseek         = no_llseek,
1226        .read           = do_sync_read,
1227        .aio_read       = fuse_dev_read,
1228        .write          = do_sync_write,
1229        .aio_write      = fuse_dev_write,
1230        .poll           = fuse_dev_poll,
1231        .release        = fuse_dev_release,
1232        .fasync         = fuse_dev_fasync,
1233};
1234EXPORT_SYMBOL_GPL(fuse_dev_operations);
1235
1236static struct miscdevice fuse_miscdevice = {
1237        .minor = FUSE_MINOR,
1238        .name  = "fuse",
1239        .fops = &fuse_dev_operations,
1240};
1241
1242int __init fuse_dev_init(void)
1243{
1244        int err = -ENOMEM;
1245        fuse_req_cachep = kmem_cache_create("fuse_request",
1246                                            sizeof(struct fuse_req),
1247                                            0, 0, NULL);
1248        if (!fuse_req_cachep)
1249                goto out;
1250
1251        err = misc_register(&fuse_miscdevice);
1252        if (err)
1253                goto out_cache_clean;
1254
1255        return 0;
1256
1257 out_cache_clean:
1258        kmem_cache_destroy(fuse_req_cachep);
1259 out:
1260        return err;
1261}
1262
1263void fuse_dev_cleanup(void)
1264{
1265        misc_deregister(&fuse_miscdevice);
1266        kmem_cache_destroy(fuse_req_cachep);
1267}
1268