linux/fs/fuse/virtio_fs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * virtio-fs: Virtio Filesystem
   4 * Copyright (C) 2018 Red Hat, Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/dax.h>
   9#include <linux/pci.h>
  10#include <linux/pfn_t.h>
  11#include <linux/module.h>
  12#include <linux/virtio.h>
  13#include <linux/virtio_fs.h>
  14#include <linux/delay.h>
  15#include <linux/fs_context.h>
  16#include <linux/fs_parser.h>
  17#include <linux/highmem.h>
  18#include <linux/uio.h>
  19#include "fuse_i.h"
  20
  21/* Used to help calculate the FUSE connection's max_pages limit for a request's
  22 * size. Parts of the struct fuse_req are sliced into scattergather lists in
  23 * addition to the pages used, so this can help account for that overhead.
  24 */
  25#define FUSE_HEADER_OVERHEAD    4
  26
  27/* List of virtio-fs device instances and a lock for the list. Also provides
  28 * mutual exclusion in device removal and mounting path
  29 */
  30static DEFINE_MUTEX(virtio_fs_mutex);
  31static LIST_HEAD(virtio_fs_instances);
  32
  33enum {
  34        VQ_HIPRIO,
  35        VQ_REQUEST
  36};
  37
  38#define VQ_NAME_LEN     24
  39
  40/* Per-virtqueue state */
  41struct virtio_fs_vq {
  42        spinlock_t lock;
  43        struct virtqueue *vq;     /* protected by ->lock */
  44        struct work_struct done_work;
  45        struct list_head queued_reqs;
  46        struct list_head end_reqs;      /* End these requests */
  47        struct delayed_work dispatch_work;
  48        struct fuse_dev *fud;
  49        bool connected;
  50        long in_flight;
  51        struct completion in_flight_zero; /* No inflight requests */
  52        char name[VQ_NAME_LEN];
  53} ____cacheline_aligned_in_smp;
  54
  55/* A virtio-fs device instance */
  56struct virtio_fs {
  57        struct kref refcount;
  58        struct list_head list;    /* on virtio_fs_instances */
  59        char *tag;
  60        struct virtio_fs_vq *vqs;
  61        unsigned int nvqs;               /* number of virtqueues */
  62        unsigned int num_request_queues; /* number of request queues */
  63        struct dax_device *dax_dev;
  64
  65        /* DAX memory window where file contents are mapped */
  66        void *window_kaddr;
  67        phys_addr_t window_phys_addr;
  68        size_t window_len;
  69};
  70
  71struct virtio_fs_forget_req {
  72        struct fuse_in_header ih;
  73        struct fuse_forget_in arg;
  74};
  75
  76struct virtio_fs_forget {
  77        /* This request can be temporarily queued on virt queue */
  78        struct list_head list;
  79        struct virtio_fs_forget_req req;
  80};
  81
  82struct virtio_fs_req_work {
  83        struct fuse_req *req;
  84        struct virtio_fs_vq *fsvq;
  85        struct work_struct done_work;
  86};
  87
  88static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
  89                                 struct fuse_req *req, bool in_flight);
  90
  91enum {
  92        OPT_DAX,
  93};
  94
  95static const struct fs_parameter_spec virtio_fs_parameters[] = {
  96        fsparam_flag("dax", OPT_DAX),
  97        {}
  98};
  99
 100static int virtio_fs_parse_param(struct fs_context *fsc,
 101                                 struct fs_parameter *param)
 102{
 103        struct fs_parse_result result;
 104        struct fuse_fs_context *ctx = fsc->fs_private;
 105        int opt;
 106
 107        opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
 108        if (opt < 0)
 109                return opt;
 110
 111        switch (opt) {
 112        case OPT_DAX:
 113                ctx->dax = 1;
 114                break;
 115        default:
 116                return -EINVAL;
 117        }
 118
 119        return 0;
 120}
 121
 122static void virtio_fs_free_fsc(struct fs_context *fsc)
 123{
 124        struct fuse_fs_context *ctx = fsc->fs_private;
 125
 126        kfree(ctx);
 127}
 128
 129static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
 130{
 131        struct virtio_fs *fs = vq->vdev->priv;
 132
 133        return &fs->vqs[vq->index];
 134}
 135
 136/* Should be called with fsvq->lock held. */
 137static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
 138{
 139        fsvq->in_flight++;
 140}
 141
 142/* Should be called with fsvq->lock held. */
 143static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
 144{
 145        WARN_ON(fsvq->in_flight <= 0);
 146        fsvq->in_flight--;
 147        if (!fsvq->in_flight)
 148                complete(&fsvq->in_flight_zero);
 149}
 150
 151static void release_virtio_fs_obj(struct kref *ref)
 152{
 153        struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
 154
 155        kfree(vfs->vqs);
 156        kfree(vfs);
 157}
 158
 159/* Make sure virtiofs_mutex is held */
 160static void virtio_fs_put(struct virtio_fs *fs)
 161{
 162        kref_put(&fs->refcount, release_virtio_fs_obj);
 163}
 164
 165static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
 166{
 167        struct virtio_fs *vfs = fiq->priv;
 168
 169        mutex_lock(&virtio_fs_mutex);
 170        virtio_fs_put(vfs);
 171        mutex_unlock(&virtio_fs_mutex);
 172}
 173
 174static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
 175{
 176        WARN_ON(fsvq->in_flight < 0);
 177
 178        /* Wait for in flight requests to finish.*/
 179        spin_lock(&fsvq->lock);
 180        if (fsvq->in_flight) {
 181                /* We are holding virtio_fs_mutex. There should not be any
 182                 * waiters waiting for completion.
 183                 */
 184                reinit_completion(&fsvq->in_flight_zero);
 185                spin_unlock(&fsvq->lock);
 186                wait_for_completion(&fsvq->in_flight_zero);
 187        } else {
 188                spin_unlock(&fsvq->lock);
 189        }
 190
 191        flush_work(&fsvq->done_work);
 192        flush_delayed_work(&fsvq->dispatch_work);
 193}
 194
 195static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
 196{
 197        struct virtio_fs_vq *fsvq;
 198        int i;
 199
 200        for (i = 0; i < fs->nvqs; i++) {
 201                fsvq = &fs->vqs[i];
 202                virtio_fs_drain_queue(fsvq);
 203        }
 204}
 205
 206static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
 207{
 208        /* Provides mutual exclusion between ->remove and ->kill_sb
 209         * paths. We don't want both of these draining queue at the
 210         * same time. Current completion logic reinits completion
 211         * and that means there should not be any other thread
 212         * doing reinit or waiting for completion already.
 213         */
 214        mutex_lock(&virtio_fs_mutex);
 215        virtio_fs_drain_all_queues_locked(fs);
 216        mutex_unlock(&virtio_fs_mutex);
 217}
 218
 219static void virtio_fs_start_all_queues(struct virtio_fs *fs)
 220{
 221        struct virtio_fs_vq *fsvq;
 222        int i;
 223
 224        for (i = 0; i < fs->nvqs; i++) {
 225                fsvq = &fs->vqs[i];
 226                spin_lock(&fsvq->lock);
 227                fsvq->connected = true;
 228                spin_unlock(&fsvq->lock);
 229        }
 230}
 231
 232/* Add a new instance to the list or return -EEXIST if tag name exists*/
 233static int virtio_fs_add_instance(struct virtio_fs *fs)
 234{
 235        struct virtio_fs *fs2;
 236        bool duplicate = false;
 237
 238        mutex_lock(&virtio_fs_mutex);
 239
 240        list_for_each_entry(fs2, &virtio_fs_instances, list) {
 241                if (strcmp(fs->tag, fs2->tag) == 0)
 242                        duplicate = true;
 243        }
 244
 245        if (!duplicate)
 246                list_add_tail(&fs->list, &virtio_fs_instances);
 247
 248        mutex_unlock(&virtio_fs_mutex);
 249
 250        if (duplicate)
 251                return -EEXIST;
 252        return 0;
 253}
 254
 255/* Return the virtio_fs with a given tag, or NULL */
 256static struct virtio_fs *virtio_fs_find_instance(const char *tag)
 257{
 258        struct virtio_fs *fs;
 259
 260        mutex_lock(&virtio_fs_mutex);
 261
 262        list_for_each_entry(fs, &virtio_fs_instances, list) {
 263                if (strcmp(fs->tag, tag) == 0) {
 264                        kref_get(&fs->refcount);
 265                        goto found;
 266                }
 267        }
 268
 269        fs = NULL; /* not found */
 270
 271found:
 272        mutex_unlock(&virtio_fs_mutex);
 273
 274        return fs;
 275}
 276
 277static void virtio_fs_free_devs(struct virtio_fs *fs)
 278{
 279        unsigned int i;
 280
 281        for (i = 0; i < fs->nvqs; i++) {
 282                struct virtio_fs_vq *fsvq = &fs->vqs[i];
 283
 284                if (!fsvq->fud)
 285                        continue;
 286
 287                fuse_dev_free(fsvq->fud);
 288                fsvq->fud = NULL;
 289        }
 290}
 291
 292/* Read filesystem name from virtio config into fs->tag (must kfree()). */
 293static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
 294{
 295        char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
 296        char *end;
 297        size_t len;
 298
 299        virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
 300                           &tag_buf, sizeof(tag_buf));
 301        end = memchr(tag_buf, '\0', sizeof(tag_buf));
 302        if (end == tag_buf)
 303                return -EINVAL; /* empty tag */
 304        if (!end)
 305                end = &tag_buf[sizeof(tag_buf)];
 306
 307        len = end - tag_buf;
 308        fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
 309        if (!fs->tag)
 310                return -ENOMEM;
 311        memcpy(fs->tag, tag_buf, len);
 312        fs->tag[len] = '\0';
 313        return 0;
 314}
 315
 316/* Work function for hiprio completion */
 317static void virtio_fs_hiprio_done_work(struct work_struct *work)
 318{
 319        struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
 320                                                 done_work);
 321        struct virtqueue *vq = fsvq->vq;
 322
 323        /* Free completed FUSE_FORGET requests */
 324        spin_lock(&fsvq->lock);
 325        do {
 326                unsigned int len;
 327                void *req;
 328
 329                virtqueue_disable_cb(vq);
 330
 331                while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
 332                        kfree(req);
 333                        dec_in_flight_req(fsvq);
 334                }
 335        } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
 336        spin_unlock(&fsvq->lock);
 337}
 338
 339static void virtio_fs_request_dispatch_work(struct work_struct *work)
 340{
 341        struct fuse_req *req;
 342        struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
 343                                                 dispatch_work.work);
 344        int ret;
 345
 346        pr_debug("virtio-fs: worker %s called.\n", __func__);
 347        while (1) {
 348                spin_lock(&fsvq->lock);
 349                req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
 350                                               list);
 351                if (!req) {
 352                        spin_unlock(&fsvq->lock);
 353                        break;
 354                }
 355
 356                list_del_init(&req->list);
 357                spin_unlock(&fsvq->lock);
 358                fuse_request_end(req);
 359        }
 360
 361        /* Dispatch pending requests */
 362        while (1) {
 363                spin_lock(&fsvq->lock);
 364                req = list_first_entry_or_null(&fsvq->queued_reqs,
 365                                               struct fuse_req, list);
 366                if (!req) {
 367                        spin_unlock(&fsvq->lock);
 368                        return;
 369                }
 370                list_del_init(&req->list);
 371                spin_unlock(&fsvq->lock);
 372
 373                ret = virtio_fs_enqueue_req(fsvq, req, true);
 374                if (ret < 0) {
 375                        if (ret == -ENOMEM || ret == -ENOSPC) {
 376                                spin_lock(&fsvq->lock);
 377                                list_add_tail(&req->list, &fsvq->queued_reqs);
 378                                schedule_delayed_work(&fsvq->dispatch_work,
 379                                                      msecs_to_jiffies(1));
 380                                spin_unlock(&fsvq->lock);
 381                                return;
 382                        }
 383                        req->out.h.error = ret;
 384                        spin_lock(&fsvq->lock);
 385                        dec_in_flight_req(fsvq);
 386                        spin_unlock(&fsvq->lock);
 387                        pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
 388                               ret);
 389                        fuse_request_end(req);
 390                }
 391        }
 392}
 393
 394/*
 395 * Returns 1 if queue is full and sender should wait a bit before sending
 396 * next request, 0 otherwise.
 397 */
 398static int send_forget_request(struct virtio_fs_vq *fsvq,
 399                               struct virtio_fs_forget *forget,
 400                               bool in_flight)
 401{
 402        struct scatterlist sg;
 403        struct virtqueue *vq;
 404        int ret = 0;
 405        bool notify;
 406        struct virtio_fs_forget_req *req = &forget->req;
 407
 408        spin_lock(&fsvq->lock);
 409        if (!fsvq->connected) {
 410                if (in_flight)
 411                        dec_in_flight_req(fsvq);
 412                kfree(forget);
 413                goto out;
 414        }
 415
 416        sg_init_one(&sg, req, sizeof(*req));
 417        vq = fsvq->vq;
 418        dev_dbg(&vq->vdev->dev, "%s\n", __func__);
 419
 420        ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
 421        if (ret < 0) {
 422                if (ret == -ENOMEM || ret == -ENOSPC) {
 423                        pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
 424                                 ret);
 425                        list_add_tail(&forget->list, &fsvq->queued_reqs);
 426                        schedule_delayed_work(&fsvq->dispatch_work,
 427                                              msecs_to_jiffies(1));
 428                        if (!in_flight)
 429                                inc_in_flight_req(fsvq);
 430                        /* Queue is full */
 431                        ret = 1;
 432                } else {
 433                        pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
 434                                 ret);
 435                        kfree(forget);
 436                        if (in_flight)
 437                                dec_in_flight_req(fsvq);
 438                }
 439                goto out;
 440        }
 441
 442        if (!in_flight)
 443                inc_in_flight_req(fsvq);
 444        notify = virtqueue_kick_prepare(vq);
 445        spin_unlock(&fsvq->lock);
 446
 447        if (notify)
 448                virtqueue_notify(vq);
 449        return ret;
 450out:
 451        spin_unlock(&fsvq->lock);
 452        return ret;
 453}
 454
 455static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
 456{
 457        struct virtio_fs_forget *forget;
 458        struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
 459                                                 dispatch_work.work);
 460        pr_debug("virtio-fs: worker %s called.\n", __func__);
 461        while (1) {
 462                spin_lock(&fsvq->lock);
 463                forget = list_first_entry_or_null(&fsvq->queued_reqs,
 464                                        struct virtio_fs_forget, list);
 465                if (!forget) {
 466                        spin_unlock(&fsvq->lock);
 467                        return;
 468                }
 469
 470                list_del(&forget->list);
 471                spin_unlock(&fsvq->lock);
 472                if (send_forget_request(fsvq, forget, true))
 473                        return;
 474        }
 475}
 476
 477/* Allocate and copy args into req->argbuf */
 478static int copy_args_to_argbuf(struct fuse_req *req)
 479{
 480        struct fuse_args *args = req->args;
 481        unsigned int offset = 0;
 482        unsigned int num_in;
 483        unsigned int num_out;
 484        unsigned int len;
 485        unsigned int i;
 486
 487        num_in = args->in_numargs - args->in_pages;
 488        num_out = args->out_numargs - args->out_pages;
 489        len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
 490              fuse_len_args(num_out, args->out_args);
 491
 492        req->argbuf = kmalloc(len, GFP_ATOMIC);
 493        if (!req->argbuf)
 494                return -ENOMEM;
 495
 496        for (i = 0; i < num_in; i++) {
 497                memcpy(req->argbuf + offset,
 498                       args->in_args[i].value,
 499                       args->in_args[i].size);
 500                offset += args->in_args[i].size;
 501        }
 502
 503        return 0;
 504}
 505
 506/* Copy args out of and free req->argbuf */
 507static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
 508{
 509        unsigned int remaining;
 510        unsigned int offset;
 511        unsigned int num_in;
 512        unsigned int num_out;
 513        unsigned int i;
 514
 515        remaining = req->out.h.len - sizeof(req->out.h);
 516        num_in = args->in_numargs - args->in_pages;
 517        num_out = args->out_numargs - args->out_pages;
 518        offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
 519
 520        for (i = 0; i < num_out; i++) {
 521                unsigned int argsize = args->out_args[i].size;
 522
 523                if (args->out_argvar &&
 524                    i == args->out_numargs - 1 &&
 525                    argsize > remaining) {
 526                        argsize = remaining;
 527                }
 528
 529                memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
 530                offset += argsize;
 531
 532                if (i != args->out_numargs - 1)
 533                        remaining -= argsize;
 534        }
 535
 536        /* Store the actual size of the variable-length arg */
 537        if (args->out_argvar)
 538                args->out_args[args->out_numargs - 1].size = remaining;
 539
 540        kfree(req->argbuf);
 541        req->argbuf = NULL;
 542}
 543
 544/* Work function for request completion */
 545static void virtio_fs_request_complete(struct fuse_req *req,
 546                                       struct virtio_fs_vq *fsvq)
 547{
 548        struct fuse_pqueue *fpq = &fsvq->fud->pq;
 549        struct fuse_args *args;
 550        struct fuse_args_pages *ap;
 551        unsigned int len, i, thislen;
 552        struct page *page;
 553
 554        /*
 555         * TODO verify that server properly follows FUSE protocol
 556         * (oh.uniq, oh.len)
 557         */
 558        args = req->args;
 559        copy_args_from_argbuf(args, req);
 560
 561        if (args->out_pages && args->page_zeroing) {
 562                len = args->out_args[args->out_numargs - 1].size;
 563                ap = container_of(args, typeof(*ap), args);
 564                for (i = 0; i < ap->num_pages; i++) {
 565                        thislen = ap->descs[i].length;
 566                        if (len < thislen) {
 567                                WARN_ON(ap->descs[i].offset);
 568                                page = ap->pages[i];
 569                                zero_user_segment(page, len, thislen);
 570                                len = 0;
 571                        } else {
 572                                len -= thislen;
 573                        }
 574                }
 575        }
 576
 577        spin_lock(&fpq->lock);
 578        clear_bit(FR_SENT, &req->flags);
 579        spin_unlock(&fpq->lock);
 580
 581        fuse_request_end(req);
 582        spin_lock(&fsvq->lock);
 583        dec_in_flight_req(fsvq);
 584        spin_unlock(&fsvq->lock);
 585}
 586
 587static void virtio_fs_complete_req_work(struct work_struct *work)
 588{
 589        struct virtio_fs_req_work *w =
 590                container_of(work, typeof(*w), done_work);
 591
 592        virtio_fs_request_complete(w->req, w->fsvq);
 593        kfree(w);
 594}
 595
 596static void virtio_fs_requests_done_work(struct work_struct *work)
 597{
 598        struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
 599                                                 done_work);
 600        struct fuse_pqueue *fpq = &fsvq->fud->pq;
 601        struct virtqueue *vq = fsvq->vq;
 602        struct fuse_req *req;
 603        struct fuse_req *next;
 604        unsigned int len;
 605        LIST_HEAD(reqs);
 606
 607        /* Collect completed requests off the virtqueue */
 608        spin_lock(&fsvq->lock);
 609        do {
 610                virtqueue_disable_cb(vq);
 611
 612                while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
 613                        spin_lock(&fpq->lock);
 614                        list_move_tail(&req->list, &reqs);
 615                        spin_unlock(&fpq->lock);
 616                }
 617        } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
 618        spin_unlock(&fsvq->lock);
 619
 620        /* End requests */
 621        list_for_each_entry_safe(req, next, &reqs, list) {
 622                list_del_init(&req->list);
 623
 624                /* blocking async request completes in a worker context */
 625                if (req->args->may_block) {
 626                        struct virtio_fs_req_work *w;
 627
 628                        w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
 629                        INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
 630                        w->fsvq = fsvq;
 631                        w->req = req;
 632                        schedule_work(&w->done_work);
 633                } else {
 634                        virtio_fs_request_complete(req, fsvq);
 635                }
 636        }
 637}
 638
 639/* Virtqueue interrupt handler */
 640static void virtio_fs_vq_done(struct virtqueue *vq)
 641{
 642        struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
 643
 644        dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
 645
 646        schedule_work(&fsvq->done_work);
 647}
 648
 649static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
 650                              int vq_type)
 651{
 652        strncpy(fsvq->name, name, VQ_NAME_LEN);
 653        spin_lock_init(&fsvq->lock);
 654        INIT_LIST_HEAD(&fsvq->queued_reqs);
 655        INIT_LIST_HEAD(&fsvq->end_reqs);
 656        init_completion(&fsvq->in_flight_zero);
 657
 658        if (vq_type == VQ_REQUEST) {
 659                INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
 660                INIT_DELAYED_WORK(&fsvq->dispatch_work,
 661                                  virtio_fs_request_dispatch_work);
 662        } else {
 663                INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
 664                INIT_DELAYED_WORK(&fsvq->dispatch_work,
 665                                  virtio_fs_hiprio_dispatch_work);
 666        }
 667}
 668
 669/* Initialize virtqueues */
 670static int virtio_fs_setup_vqs(struct virtio_device *vdev,
 671                               struct virtio_fs *fs)
 672{
 673        struct virtqueue **vqs;
 674        vq_callback_t **callbacks;
 675        const char **names;
 676        unsigned int i;
 677        int ret = 0;
 678
 679        virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
 680                        &fs->num_request_queues);
 681        if (fs->num_request_queues == 0)
 682                return -EINVAL;
 683
 684        fs->nvqs = VQ_REQUEST + fs->num_request_queues;
 685        fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
 686        if (!fs->vqs)
 687                return -ENOMEM;
 688
 689        vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
 690        callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
 691                                        GFP_KERNEL);
 692        names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
 693        if (!vqs || !callbacks || !names) {
 694                ret = -ENOMEM;
 695                goto out;
 696        }
 697
 698        /* Initialize the hiprio/forget request virtqueue */
 699        callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
 700        virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
 701        names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
 702
 703        /* Initialize the requests virtqueues */
 704        for (i = VQ_REQUEST; i < fs->nvqs; i++) {
 705                char vq_name[VQ_NAME_LEN];
 706
 707                snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
 708                virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
 709                callbacks[i] = virtio_fs_vq_done;
 710                names[i] = fs->vqs[i].name;
 711        }
 712
 713        ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
 714        if (ret < 0)
 715                goto out;
 716
 717        for (i = 0; i < fs->nvqs; i++)
 718                fs->vqs[i].vq = vqs[i];
 719
 720        virtio_fs_start_all_queues(fs);
 721out:
 722        kfree(names);
 723        kfree(callbacks);
 724        kfree(vqs);
 725        if (ret)
 726                kfree(fs->vqs);
 727        return ret;
 728}
 729
 730/* Free virtqueues (device must already be reset) */
 731static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
 732                                  struct virtio_fs *fs)
 733{
 734        vdev->config->del_vqs(vdev);
 735}
 736
 737/* Map a window offset to a page frame number.  The window offset will have
 738 * been produced by .iomap_begin(), which maps a file offset to a window
 739 * offset.
 740 */
 741static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 742                                    long nr_pages, void **kaddr, pfn_t *pfn)
 743{
 744        struct virtio_fs *fs = dax_get_private(dax_dev);
 745        phys_addr_t offset = PFN_PHYS(pgoff);
 746        size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
 747
 748        if (kaddr)
 749                *kaddr = fs->window_kaddr + offset;
 750        if (pfn)
 751                *pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
 752                                        PFN_DEV | PFN_MAP);
 753        return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
 754}
 755
 756static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
 757                                       pgoff_t pgoff, void *addr,
 758                                       size_t bytes, struct iov_iter *i)
 759{
 760        return copy_from_iter(addr, bytes, i);
 761}
 762
 763static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
 764                                       pgoff_t pgoff, void *addr,
 765                                       size_t bytes, struct iov_iter *i)
 766{
 767        return copy_to_iter(addr, bytes, i);
 768}
 769
 770static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
 771                                     pgoff_t pgoff, size_t nr_pages)
 772{
 773        long rc;
 774        void *kaddr;
 775
 776        rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
 777        if (rc < 0)
 778                return rc;
 779        memset(kaddr, 0, nr_pages << PAGE_SHIFT);
 780        dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
 781        return 0;
 782}
 783
 784static const struct dax_operations virtio_fs_dax_ops = {
 785        .direct_access = virtio_fs_direct_access,
 786        .copy_from_iter = virtio_fs_copy_from_iter,
 787        .copy_to_iter = virtio_fs_copy_to_iter,
 788        .zero_page_range = virtio_fs_zero_page_range,
 789};
 790
 791static void virtio_fs_cleanup_dax(void *data)
 792{
 793        struct dax_device *dax_dev = data;
 794
 795        kill_dax(dax_dev);
 796        put_dax(dax_dev);
 797}
 798
 799static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 800{
 801        struct virtio_shm_region cache_reg;
 802        struct dev_pagemap *pgmap;
 803        bool have_cache;
 804
 805        if (!IS_ENABLED(CONFIG_FUSE_DAX))
 806                return 0;
 807
 808        /* Get cache region */
 809        have_cache = virtio_get_shm_region(vdev, &cache_reg,
 810                                           (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
 811        if (!have_cache) {
 812                dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
 813                return 0;
 814        }
 815
 816        if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
 817                                     dev_name(&vdev->dev))) {
 818                dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
 819                         cache_reg.addr, cache_reg.len);
 820                return -EBUSY;
 821        }
 822
 823        dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
 824                   cache_reg.addr);
 825
 826        pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
 827        if (!pgmap)
 828                return -ENOMEM;
 829
 830        pgmap->type = MEMORY_DEVICE_FS_DAX;
 831
 832        /* Ideally we would directly use the PCI BAR resource but
 833         * devm_memremap_pages() wants its own copy in pgmap.  So
 834         * initialize a struct resource from scratch (only the start
 835         * and end fields will be used).
 836         */
 837        pgmap->range = (struct range) {
 838                .start = (phys_addr_t) cache_reg.addr,
 839                .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
 840        };
 841        pgmap->nr_range = 1;
 842
 843        fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
 844        if (IS_ERR(fs->window_kaddr))
 845                return PTR_ERR(fs->window_kaddr);
 846
 847        fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
 848        fs->window_len = (phys_addr_t) cache_reg.len;
 849
 850        dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
 851                __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
 852
 853        fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
 854        if (IS_ERR(fs->dax_dev))
 855                return PTR_ERR(fs->dax_dev);
 856
 857        return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
 858                                        fs->dax_dev);
 859}
 860
 861static int virtio_fs_probe(struct virtio_device *vdev)
 862{
 863        struct virtio_fs *fs;
 864        int ret;
 865
 866        fs = kzalloc(sizeof(*fs), GFP_KERNEL);
 867        if (!fs)
 868                return -ENOMEM;
 869        kref_init(&fs->refcount);
 870        vdev->priv = fs;
 871
 872        ret = virtio_fs_read_tag(vdev, fs);
 873        if (ret < 0)
 874                goto out;
 875
 876        ret = virtio_fs_setup_vqs(vdev, fs);
 877        if (ret < 0)
 878                goto out;
 879
 880        /* TODO vq affinity */
 881
 882        ret = virtio_fs_setup_dax(vdev, fs);
 883        if (ret < 0)
 884                goto out_vqs;
 885
 886        /* Bring the device online in case the filesystem is mounted and
 887         * requests need to be sent before we return.
 888         */
 889        virtio_device_ready(vdev);
 890
 891        ret = virtio_fs_add_instance(fs);
 892        if (ret < 0)
 893                goto out_vqs;
 894
 895        return 0;
 896
 897out_vqs:
 898        vdev->config->reset(vdev);
 899        virtio_fs_cleanup_vqs(vdev, fs);
 900        kfree(fs->vqs);
 901
 902out:
 903        vdev->priv = NULL;
 904        kfree(fs);
 905        return ret;
 906}
 907
 908static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
 909{
 910        struct virtio_fs_vq *fsvq;
 911        int i;
 912
 913        for (i = 0; i < fs->nvqs; i++) {
 914                fsvq = &fs->vqs[i];
 915                spin_lock(&fsvq->lock);
 916                fsvq->connected = false;
 917                spin_unlock(&fsvq->lock);
 918        }
 919}
 920
 921static void virtio_fs_remove(struct virtio_device *vdev)
 922{
 923        struct virtio_fs *fs = vdev->priv;
 924
 925        mutex_lock(&virtio_fs_mutex);
 926        /* This device is going away. No one should get new reference */
 927        list_del_init(&fs->list);
 928        virtio_fs_stop_all_queues(fs);
 929        virtio_fs_drain_all_queues_locked(fs);
 930        vdev->config->reset(vdev);
 931        virtio_fs_cleanup_vqs(vdev, fs);
 932
 933        vdev->priv = NULL;
 934        /* Put device reference on virtio_fs object */
 935        virtio_fs_put(fs);
 936        mutex_unlock(&virtio_fs_mutex);
 937}
 938
 939#ifdef CONFIG_PM_SLEEP
 940static int virtio_fs_freeze(struct virtio_device *vdev)
 941{
 942        /* TODO need to save state here */
 943        pr_warn("virtio-fs: suspend/resume not yet supported\n");
 944        return -EOPNOTSUPP;
 945}
 946
 947static int virtio_fs_restore(struct virtio_device *vdev)
 948{
 949         /* TODO need to restore state here */
 950        return 0;
 951}
 952#endif /* CONFIG_PM_SLEEP */
 953
 954static const struct virtio_device_id id_table[] = {
 955        { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
 956        {},
 957};
 958
 959static const unsigned int feature_table[] = {};
 960
 961static struct virtio_driver virtio_fs_driver = {
 962        .driver.name            = KBUILD_MODNAME,
 963        .driver.owner           = THIS_MODULE,
 964        .id_table               = id_table,
 965        .feature_table          = feature_table,
 966        .feature_table_size     = ARRAY_SIZE(feature_table),
 967        .probe                  = virtio_fs_probe,
 968        .remove                 = virtio_fs_remove,
 969#ifdef CONFIG_PM_SLEEP
 970        .freeze                 = virtio_fs_freeze,
 971        .restore                = virtio_fs_restore,
 972#endif
 973};
 974
 975static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
 976__releases(fiq->lock)
 977{
 978        struct fuse_forget_link *link;
 979        struct virtio_fs_forget *forget;
 980        struct virtio_fs_forget_req *req;
 981        struct virtio_fs *fs;
 982        struct virtio_fs_vq *fsvq;
 983        u64 unique;
 984
 985        link = fuse_dequeue_forget(fiq, 1, NULL);
 986        unique = fuse_get_unique(fiq);
 987
 988        fs = fiq->priv;
 989        fsvq = &fs->vqs[VQ_HIPRIO];
 990        spin_unlock(&fiq->lock);
 991
 992        /* Allocate a buffer for the request */
 993        forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
 994        req = &forget->req;
 995
 996        req->ih = (struct fuse_in_header){
 997                .opcode = FUSE_FORGET,
 998                .nodeid = link->forget_one.nodeid,
 999                .unique = unique,
1000                .len = sizeof(*req),
1001        };
1002        req->arg = (struct fuse_forget_in){
1003                .nlookup = link->forget_one.nlookup,
1004        };
1005
1006        send_forget_request(fsvq, forget, false);
1007        kfree(link);
1008}
1009
1010static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
1011__releases(fiq->lock)
1012{
1013        /*
1014         * TODO interrupts.
1015         *
1016         * Normal fs operations on a local filesystems aren't interruptible.
1017         * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
1018         * with shared lock between host and guest.
1019         */
1020        spin_unlock(&fiq->lock);
1021}
1022
1023/* Count number of scatter-gather elements required */
1024static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
1025                                       unsigned int num_pages,
1026                                       unsigned int total_len)
1027{
1028        unsigned int i;
1029        unsigned int this_len;
1030
1031        for (i = 0; i < num_pages && total_len; i++) {
1032                this_len =  min(page_descs[i].length, total_len);
1033                total_len -= this_len;
1034        }
1035
1036        return i;
1037}
1038
1039/* Return the number of scatter-gather list elements required */
1040static unsigned int sg_count_fuse_req(struct fuse_req *req)
1041{
1042        struct fuse_args *args = req->args;
1043        struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
1044        unsigned int size, total_sgs = 1 /* fuse_in_header */;
1045
1046        if (args->in_numargs - args->in_pages)
1047                total_sgs += 1;
1048
1049        if (args->in_pages) {
1050                size = args->in_args[args->in_numargs - 1].size;
1051                total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
1052                                                 size);
1053        }
1054
1055        if (!test_bit(FR_ISREPLY, &req->flags))
1056                return total_sgs;
1057
1058        total_sgs += 1 /* fuse_out_header */;
1059
1060        if (args->out_numargs - args->out_pages)
1061                total_sgs += 1;
1062
1063        if (args->out_pages) {
1064                size = args->out_args[args->out_numargs - 1].size;
1065                total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
1066                                                 size);
1067        }
1068
1069        return total_sgs;
1070}
1071
1072/* Add pages to scatter-gather list and return number of elements used */
1073static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
1074                                       struct page **pages,
1075                                       struct fuse_page_desc *page_descs,
1076                                       unsigned int num_pages,
1077                                       unsigned int total_len)
1078{
1079        unsigned int i;
1080        unsigned int this_len;
1081
1082        for (i = 0; i < num_pages && total_len; i++) {
1083                sg_init_table(&sg[i], 1);
1084                this_len =  min(page_descs[i].length, total_len);
1085                sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
1086                total_len -= this_len;
1087        }
1088
1089        return i;
1090}
1091
1092/* Add args to scatter-gather list and return number of elements used */
1093static unsigned int sg_init_fuse_args(struct scatterlist *sg,
1094                                      struct fuse_req *req,
1095                                      struct fuse_arg *args,
1096                                      unsigned int numargs,
1097                                      bool argpages,
1098                                      void *argbuf,
1099                                      unsigned int *len_used)
1100{
1101        struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1102        unsigned int total_sgs = 0;
1103        unsigned int len;
1104
1105        len = fuse_len_args(numargs - argpages, args);
1106        if (len)
1107                sg_init_one(&sg[total_sgs++], argbuf, len);
1108
1109        if (argpages)
1110                total_sgs += sg_init_fuse_pages(&sg[total_sgs],
1111                                                ap->pages, ap->descs,
1112                                                ap->num_pages,
1113                                                args[numargs - 1].size);
1114
1115        if (len_used)
1116                *len_used = len;
1117
1118        return total_sgs;
1119}
1120
1121/* Add a request to a virtqueue and kick the device */
1122static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
1123                                 struct fuse_req *req, bool in_flight)
1124{
1125        /* requests need at least 4 elements */
1126        struct scatterlist *stack_sgs[6];
1127        struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
1128        struct scatterlist **sgs = stack_sgs;
1129        struct scatterlist *sg = stack_sg;
1130        struct virtqueue *vq;
1131        struct fuse_args *args = req->args;
1132        unsigned int argbuf_used = 0;
1133        unsigned int out_sgs = 0;
1134        unsigned int in_sgs = 0;
1135        unsigned int total_sgs;
1136        unsigned int i;
1137        int ret;
1138        bool notify;
1139        struct fuse_pqueue *fpq;
1140
1141        /* Does the sglist fit on the stack? */
1142        total_sgs = sg_count_fuse_req(req);
1143        if (total_sgs > ARRAY_SIZE(stack_sgs)) {
1144                sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
1145                sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
1146                if (!sgs || !sg) {
1147                        ret = -ENOMEM;
1148                        goto out;
1149                }
1150        }
1151
1152        /* Use a bounce buffer since stack args cannot be mapped */
1153        ret = copy_args_to_argbuf(req);
1154        if (ret < 0)
1155                goto out;
1156
1157        /* Request elements */
1158        sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
1159        out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
1160                                     (struct fuse_arg *)args->in_args,
1161                                     args->in_numargs, args->in_pages,
1162                                     req->argbuf, &argbuf_used);
1163
1164        /* Reply elements */
1165        if (test_bit(FR_ISREPLY, &req->flags)) {
1166                sg_init_one(&sg[out_sgs + in_sgs++],
1167                            &req->out.h, sizeof(req->out.h));
1168                in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
1169                                            args->out_args, args->out_numargs,
1170                                            args->out_pages,
1171                                            req->argbuf + argbuf_used, NULL);
1172        }
1173
1174        WARN_ON(out_sgs + in_sgs != total_sgs);
1175
1176        for (i = 0; i < total_sgs; i++)
1177                sgs[i] = &sg[i];
1178
1179        spin_lock(&fsvq->lock);
1180
1181        if (!fsvq->connected) {
1182                spin_unlock(&fsvq->lock);
1183                ret = -ENOTCONN;
1184                goto out;
1185        }
1186
1187        vq = fsvq->vq;
1188        ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
1189        if (ret < 0) {
1190                spin_unlock(&fsvq->lock);
1191                goto out;
1192        }
1193
1194        /* Request successfully sent. */
1195        fpq = &fsvq->fud->pq;
1196        spin_lock(&fpq->lock);
1197        list_add_tail(&req->list, fpq->processing);
1198        spin_unlock(&fpq->lock);
1199        set_bit(FR_SENT, &req->flags);
1200        /* matches barrier in request_wait_answer() */
1201        smp_mb__after_atomic();
1202
1203        if (!in_flight)
1204                inc_in_flight_req(fsvq);
1205        notify = virtqueue_kick_prepare(vq);
1206
1207        spin_unlock(&fsvq->lock);
1208
1209        if (notify)
1210                virtqueue_notify(vq);
1211
1212out:
1213        if (ret < 0 && req->argbuf) {
1214                kfree(req->argbuf);
1215                req->argbuf = NULL;
1216        }
1217        if (sgs != stack_sgs) {
1218                kfree(sgs);
1219                kfree(sg);
1220        }
1221
1222        return ret;
1223}
1224
1225static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
1226__releases(fiq->lock)
1227{
1228        unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
1229        struct virtio_fs *fs;
1230        struct fuse_req *req;
1231        struct virtio_fs_vq *fsvq;
1232        int ret;
1233
1234        WARN_ON(list_empty(&fiq->pending));
1235        req = list_last_entry(&fiq->pending, struct fuse_req, list);
1236        clear_bit(FR_PENDING, &req->flags);
1237        list_del_init(&req->list);
1238        WARN_ON(!list_empty(&fiq->pending));
1239        spin_unlock(&fiq->lock);
1240
1241        fs = fiq->priv;
1242
1243        pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
1244                  __func__, req->in.h.opcode, req->in.h.unique,
1245                 req->in.h.nodeid, req->in.h.len,
1246                 fuse_len_args(req->args->out_numargs, req->args->out_args));
1247
1248        fsvq = &fs->vqs[queue_id];
1249        ret = virtio_fs_enqueue_req(fsvq, req, false);
1250        if (ret < 0) {
1251                if (ret == -ENOMEM || ret == -ENOSPC) {
1252                        /*
1253                         * Virtqueue full. Retry submission from worker
1254                         * context as we might be holding fc->bg_lock.
1255                         */
1256                        spin_lock(&fsvq->lock);
1257                        list_add_tail(&req->list, &fsvq->queued_reqs);
1258                        inc_in_flight_req(fsvq);
1259                        schedule_delayed_work(&fsvq->dispatch_work,
1260                                                msecs_to_jiffies(1));
1261                        spin_unlock(&fsvq->lock);
1262                        return;
1263                }
1264                req->out.h.error = ret;
1265                pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
1266
1267                /* Can't end request in submission context. Use a worker */
1268                spin_lock(&fsvq->lock);
1269                list_add_tail(&req->list, &fsvq->end_reqs);
1270                schedule_delayed_work(&fsvq->dispatch_work, 0);
1271                spin_unlock(&fsvq->lock);
1272                return;
1273        }
1274}
1275
1276static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1277        .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
1278        .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
1279        .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
1280        .release                        = virtio_fs_fiq_release,
1281};
1282
1283static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
1284{
1285        ctx->rootmode = S_IFDIR;
1286        ctx->default_permissions = 1;
1287        ctx->allow_other = 1;
1288        ctx->max_read = UINT_MAX;
1289        ctx->blksize = 512;
1290        ctx->destroy = true;
1291        ctx->no_control = true;
1292        ctx->no_force_umount = true;
1293}
1294
1295static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
1296{
1297        struct fuse_mount *fm = get_fuse_mount_super(sb);
1298        struct fuse_conn *fc = fm->fc;
1299        struct virtio_fs *fs = fc->iq.priv;
1300        struct fuse_fs_context *ctx = fsc->fs_private;
1301        unsigned int i;
1302        int err;
1303
1304        virtio_fs_ctx_set_defaults(ctx);
1305        mutex_lock(&virtio_fs_mutex);
1306
1307        /* After holding mutex, make sure virtiofs device is still there.
1308         * Though we are holding a reference to it, drive ->remove might
1309         * still have cleaned up virtual queues. In that case bail out.
1310         */
1311        err = -EINVAL;
1312        if (list_empty(&fs->list)) {
1313                pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1314                goto err;
1315        }
1316
1317        err = -ENOMEM;
1318        /* Allocate fuse_dev for hiprio and notification queues */
1319        for (i = 0; i < fs->nvqs; i++) {
1320                struct virtio_fs_vq *fsvq = &fs->vqs[i];
1321
1322                fsvq->fud = fuse_dev_alloc();
1323                if (!fsvq->fud)
1324                        goto err_free_fuse_devs;
1325        }
1326
1327        /* virtiofs allocates and installs its own fuse devices */
1328        ctx->fudptr = NULL;
1329        if (ctx->dax) {
1330                if (!fs->dax_dev) {
1331                        err = -EINVAL;
1332                        pr_err("virtio-fs: dax can't be enabled as filesystem"
1333                               " device does not support it.\n");
1334                        goto err_free_fuse_devs;
1335                }
1336                ctx->dax_dev = fs->dax_dev;
1337        }
1338        err = fuse_fill_super_common(sb, ctx);
1339        if (err < 0)
1340                goto err_free_fuse_devs;
1341
1342        for (i = 0; i < fs->nvqs; i++) {
1343                struct virtio_fs_vq *fsvq = &fs->vqs[i];
1344
1345                fuse_dev_install(fsvq->fud, fc);
1346        }
1347
1348        /* Previous unmount will stop all queues. Start these again */
1349        virtio_fs_start_all_queues(fs);
1350        fuse_send_init(fm);
1351        mutex_unlock(&virtio_fs_mutex);
1352        return 0;
1353
1354err_free_fuse_devs:
1355        virtio_fs_free_devs(fs);
1356err:
1357        mutex_unlock(&virtio_fs_mutex);
1358        return err;
1359}
1360
1361static void virtio_fs_conn_destroy(struct fuse_mount *fm)
1362{
1363        struct fuse_conn *fc = fm->fc;
1364        struct virtio_fs *vfs = fc->iq.priv;
1365        struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
1366
1367        /* Stop dax worker. Soon evict_inodes() will be called which
1368         * will free all memory ranges belonging to all inodes.
1369         */
1370        if (IS_ENABLED(CONFIG_FUSE_DAX))
1371                fuse_dax_cancel_work(fc);
1372
1373        /* Stop forget queue. Soon destroy will be sent */
1374        spin_lock(&fsvq->lock);
1375        fsvq->connected = false;
1376        spin_unlock(&fsvq->lock);
1377        virtio_fs_drain_all_queues(vfs);
1378
1379        fuse_conn_destroy(fm);
1380
1381        /* fuse_conn_destroy() must have sent destroy. Stop all queues
1382         * and drain one more time and free fuse devices. Freeing fuse
1383         * devices will drop their reference on fuse_conn and that in
1384         * turn will drop its reference on virtio_fs object.
1385         */
1386        virtio_fs_stop_all_queues(vfs);
1387        virtio_fs_drain_all_queues(vfs);
1388        virtio_fs_free_devs(vfs);
1389}
1390
1391static void virtio_kill_sb(struct super_block *sb)
1392{
1393        struct fuse_mount *fm = get_fuse_mount_super(sb);
1394        bool last;
1395
1396        /* If mount failed, we can still be called without any fc */
1397        if (sb->s_root) {
1398                last = fuse_mount_remove(fm);
1399                if (last)
1400                        virtio_fs_conn_destroy(fm);
1401        }
1402        kill_anon_super(sb);
1403        fuse_mount_destroy(fm);
1404}
1405
1406static int virtio_fs_test_super(struct super_block *sb,
1407                                struct fs_context *fsc)
1408{
1409        struct fuse_mount *fsc_fm = fsc->s_fs_info;
1410        struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
1411
1412        return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
1413}
1414
1415static int virtio_fs_get_tree(struct fs_context *fsc)
1416{
1417        struct virtio_fs *fs;
1418        struct super_block *sb;
1419        struct fuse_conn *fc = NULL;
1420        struct fuse_mount *fm;
1421        unsigned int virtqueue_size;
1422        int err = -EIO;
1423
1424        /* This gets a reference on virtio_fs object. This ptr gets installed
1425         * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1426         * to drop the reference to this object.
1427         */
1428        fs = virtio_fs_find_instance(fsc->source);
1429        if (!fs) {
1430                pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1431                return -EINVAL;
1432        }
1433
1434        virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
1435        if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
1436                goto out_err;
1437
1438        err = -ENOMEM;
1439        fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1440        if (!fc)
1441                goto out_err;
1442
1443        fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1444        if (!fm)
1445                goto out_err;
1446
1447        fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
1448        fc->release = fuse_free_conn;
1449        fc->delete_stale = true;
1450        fc->auto_submounts = true;
1451        fc->sync_fs = true;
1452
1453        /* Tell FUSE to split requests that exceed the virtqueue's size */
1454        fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
1455                                    virtqueue_size - FUSE_HEADER_OVERHEAD);
1456
1457        fsc->s_fs_info = fm;
1458        sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
1459        if (fsc->s_fs_info)
1460                fuse_mount_destroy(fm);
1461        if (IS_ERR(sb))
1462                return PTR_ERR(sb);
1463
1464        if (!sb->s_root) {
1465                err = virtio_fs_fill_super(sb, fsc);
1466                if (err) {
1467                        deactivate_locked_super(sb);
1468                        return err;
1469                }
1470
1471                sb->s_flags |= SB_ACTIVE;
1472        }
1473
1474        WARN_ON(fsc->root);
1475        fsc->root = dget(sb->s_root);
1476        return 0;
1477
1478out_err:
1479        kfree(fc);
1480        mutex_lock(&virtio_fs_mutex);
1481        virtio_fs_put(fs);
1482        mutex_unlock(&virtio_fs_mutex);
1483        return err;
1484}
1485
1486static const struct fs_context_operations virtio_fs_context_ops = {
1487        .free           = virtio_fs_free_fsc,
1488        .parse_param    = virtio_fs_parse_param,
1489        .get_tree       = virtio_fs_get_tree,
1490};
1491
1492static int virtio_fs_init_fs_context(struct fs_context *fsc)
1493{
1494        struct fuse_fs_context *ctx;
1495
1496        if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT)
1497                return fuse_init_fs_context_submount(fsc);
1498
1499        ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1500        if (!ctx)
1501                return -ENOMEM;
1502        fsc->fs_private = ctx;
1503        fsc->ops = &virtio_fs_context_ops;
1504        return 0;
1505}
1506
1507static struct file_system_type virtio_fs_type = {
1508        .owner          = THIS_MODULE,
1509        .name           = "virtiofs",
1510        .init_fs_context = virtio_fs_init_fs_context,
1511        .kill_sb        = virtio_kill_sb,
1512};
1513
1514static int __init virtio_fs_init(void)
1515{
1516        int ret;
1517
1518        ret = register_virtio_driver(&virtio_fs_driver);
1519        if (ret < 0)
1520                return ret;
1521
1522        ret = register_filesystem(&virtio_fs_type);
1523        if (ret < 0) {
1524                unregister_virtio_driver(&virtio_fs_driver);
1525                return ret;
1526        }
1527
1528        return 0;
1529}
1530module_init(virtio_fs_init);
1531
1532static void __exit virtio_fs_exit(void)
1533{
1534        unregister_filesystem(&virtio_fs_type);
1535        unregister_virtio_driver(&virtio_fs_driver);
1536}
1537module_exit(virtio_fs_exit);
1538
1539MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1540MODULE_DESCRIPTION("Virtio Filesystem");
1541MODULE_LICENSE("GPL");
1542MODULE_ALIAS_FS(KBUILD_MODNAME);
1543MODULE_DEVICE_TABLE(virtio, id_table);
1544