linux/drivers/xen/xenbus/xenbus_dev_frontend.c
<<
>>
Prefs
   1/*
   2 * Driver giving user-space access to the kernel's xenbus connection
   3 * to xenstore.
   4 *
   5 * Copyright (c) 2005, Christian Limpach
   6 * Copyright (c) 2005, Rusty Russell, IBM Corporation
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License version 2
  10 * as published by the Free Software Foundation; or, when distributed
  11 * separately from the Linux kernel or incorporated into other
  12 * software packages, subject to the following license:
  13 *
  14 * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 * of this source file (the "Software"), to deal in the Software without
  16 * restriction, including without limitation the rights to use, copy, modify,
  17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18 * and to permit persons to whom the Software is furnished to do so, subject to
  19 * the following conditions:
  20 *
  21 * The above copyright notice and this permission notice shall be included in
  22 * all copies or substantial portions of the Software.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30 * IN THE SOFTWARE.
  31 *
  32 * Changes:
  33 * 2008-10-07  Alex Zeffertt    Replaced /proc/xen/xenbus with xenfs filesystem
  34 *                              and /proc/xen compatibility mount point.
  35 *                              Turned xenfs into a loadable module.
  36 */
  37
  38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  39
  40#include <linux/kernel.h>
  41#include <linux/errno.h>
  42#include <linux/uio.h>
  43#include <linux/notifier.h>
  44#include <linux/wait.h>
  45#include <linux/fs.h>
  46#include <linux/poll.h>
  47#include <linux/mutex.h>
  48#include <linux/sched.h>
  49#include <linux/spinlock.h>
  50#include <linux/mount.h>
  51#include <linux/pagemap.h>
  52#include <linux/uaccess.h>
  53#include <linux/init.h>
  54#include <linux/namei.h>
  55#include <linux/string.h>
  56#include <linux/slab.h>
  57#include <linux/miscdevice.h>
  58
  59#include <xen/xenbus.h>
  60#include <xen/xen.h>
  61#include <asm/xen/hypervisor.h>
  62
  63#include "xenbus.h"
  64
  65unsigned int xb_dev_generation_id;
  66
  67/*
  68 * An element of a list of outstanding transactions, for which we're
  69 * still waiting a reply.
  70 */
  71struct xenbus_transaction_holder {
  72        struct list_head list;
  73        struct xenbus_transaction handle;
  74        unsigned int generation_id;
  75};
  76
  77/*
  78 * A buffer of data on the queue.
  79 */
  80struct read_buffer {
  81        struct list_head list;
  82        unsigned int cons;
  83        unsigned int len;
  84        char msg[];
  85};
  86
  87struct xenbus_file_priv {
  88        /*
  89         * msgbuffer_mutex is held while partial requests are built up
  90         * and complete requests are acted on.  It therefore protects
  91         * the "transactions" and "watches" lists, and the partial
  92         * request length and buffer.
  93         *
  94         * reply_mutex protects the reply being built up to return to
  95         * usermode.  It nests inside msgbuffer_mutex but may be held
  96         * alone during a watch callback.
  97         */
  98        struct mutex msgbuffer_mutex;
  99
 100        /* In-progress transactions */
 101        struct list_head transactions;
 102
 103        /* Active watches. */
 104        struct list_head watches;
 105
 106        /* Partial request. */
 107        unsigned int len;
 108        union {
 109                struct xsd_sockmsg msg;
 110                char buffer[XENSTORE_PAYLOAD_MAX];
 111        } u;
 112
 113        /* Response queue. */
 114        struct mutex reply_mutex;
 115        struct list_head read_buffers;
 116        wait_queue_head_t read_waitq;
 117
 118        struct kref kref;
 119};
 120
 121/* Read out any raw xenbus messages queued up. */
 122static ssize_t xenbus_file_read(struct file *filp,
 123                               char __user *ubuf,
 124                               size_t len, loff_t *ppos)
 125{
 126        struct xenbus_file_priv *u = filp->private_data;
 127        struct read_buffer *rb;
 128        unsigned i;
 129        int ret;
 130
 131        mutex_lock(&u->reply_mutex);
 132again:
 133        while (list_empty(&u->read_buffers)) {
 134                mutex_unlock(&u->reply_mutex);
 135                if (filp->f_flags & O_NONBLOCK)
 136                        return -EAGAIN;
 137
 138                ret = wait_event_interruptible(u->read_waitq,
 139                                               !list_empty(&u->read_buffers));
 140                if (ret)
 141                        return ret;
 142                mutex_lock(&u->reply_mutex);
 143        }
 144
 145        rb = list_entry(u->read_buffers.next, struct read_buffer, list);
 146        i = 0;
 147        while (i < len) {
 148                unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
 149
 150                ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
 151
 152                i += sz - ret;
 153                rb->cons += sz - ret;
 154
 155                if (ret != 0) {
 156                        if (i == 0)
 157                                i = -EFAULT;
 158                        goto out;
 159                }
 160
 161                /* Clear out buffer if it has been consumed */
 162                if (rb->cons == rb->len) {
 163                        list_del(&rb->list);
 164                        kfree(rb);
 165                        if (list_empty(&u->read_buffers))
 166                                break;
 167                        rb = list_entry(u->read_buffers.next,
 168                                        struct read_buffer, list);
 169                }
 170        }
 171        if (i == 0)
 172                goto again;
 173
 174out:
 175        mutex_unlock(&u->reply_mutex);
 176        return i;
 177}
 178
 179/*
 180 * Add a buffer to the queue.  Caller must hold the appropriate lock
 181 * if the queue is not local.  (Commonly the caller will build up
 182 * multiple queued buffers on a temporary local list, and then add it
 183 * to the appropriate list under lock once all the buffers have een
 184 * successfully allocated.)
 185 */
 186static int queue_reply(struct list_head *queue, const void *data, size_t len)
 187{
 188        struct read_buffer *rb;
 189
 190        if (len == 0)
 191                return 0;
 192        if (len > XENSTORE_PAYLOAD_MAX)
 193                return -EINVAL;
 194
 195        rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
 196        if (rb == NULL)
 197                return -ENOMEM;
 198
 199        rb->cons = 0;
 200        rb->len = len;
 201
 202        memcpy(rb->msg, data, len);
 203
 204        list_add_tail(&rb->list, queue);
 205        return 0;
 206}
 207
 208/*
 209 * Free all the read_buffer s on a list.
 210 * Caller must have sole reference to list.
 211 */
 212static void queue_cleanup(struct list_head *list)
 213{
 214        struct read_buffer *rb;
 215
 216        while (!list_empty(list)) {
 217                rb = list_entry(list->next, struct read_buffer, list);
 218                list_del(list->next);
 219                kfree(rb);
 220        }
 221}
 222
 223struct watch_adapter {
 224        struct list_head list;
 225        struct xenbus_watch watch;
 226        struct xenbus_file_priv *dev_data;
 227        char *token;
 228};
 229
 230static void free_watch_adapter(struct watch_adapter *watch)
 231{
 232        kfree(watch->watch.node);
 233        kfree(watch->token);
 234        kfree(watch);
 235}
 236
 237static struct watch_adapter *alloc_watch_adapter(const char *path,
 238                                                 const char *token)
 239{
 240        struct watch_adapter *watch;
 241
 242        watch = kzalloc(sizeof(*watch), GFP_KERNEL);
 243        if (watch == NULL)
 244                goto out_fail;
 245
 246        watch->watch.node = kstrdup(path, GFP_KERNEL);
 247        if (watch->watch.node == NULL)
 248                goto out_free;
 249
 250        watch->token = kstrdup(token, GFP_KERNEL);
 251        if (watch->token == NULL)
 252                goto out_free;
 253
 254        return watch;
 255
 256out_free:
 257        free_watch_adapter(watch);
 258
 259out_fail:
 260        return NULL;
 261}
 262
 263static void watch_fired(struct xenbus_watch *watch,
 264                        const char *path,
 265                        const char *token)
 266{
 267        struct watch_adapter *adap;
 268        struct xsd_sockmsg hdr;
 269        const char *token_caller;
 270        int path_len, tok_len, body_len;
 271        int ret;
 272        LIST_HEAD(staging_q);
 273
 274        adap = container_of(watch, struct watch_adapter, watch);
 275
 276        token_caller = adap->token;
 277
 278        path_len = strlen(path) + 1;
 279        tok_len = strlen(token_caller) + 1;
 280        body_len = path_len + tok_len;
 281
 282        hdr.type = XS_WATCH_EVENT;
 283        hdr.len = body_len;
 284
 285        mutex_lock(&adap->dev_data->reply_mutex);
 286
 287        ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
 288        if (!ret)
 289                ret = queue_reply(&staging_q, path, path_len);
 290        if (!ret)
 291                ret = queue_reply(&staging_q, token_caller, tok_len);
 292
 293        if (!ret) {
 294                /* success: pass reply list onto watcher */
 295                list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
 296                wake_up(&adap->dev_data->read_waitq);
 297        } else
 298                queue_cleanup(&staging_q);
 299
 300        mutex_unlock(&adap->dev_data->reply_mutex);
 301}
 302
 303static void xenbus_file_free(struct kref *kref)
 304{
 305        struct xenbus_file_priv *u;
 306        struct xenbus_transaction_holder *trans, *tmp;
 307        struct watch_adapter *watch, *tmp_watch;
 308        struct read_buffer *rb, *tmp_rb;
 309
 310        u = container_of(kref, struct xenbus_file_priv, kref);
 311
 312        /*
 313         * No need for locking here because there are no other users,
 314         * by definition.
 315         */
 316
 317        list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
 318                xenbus_transaction_end(trans->handle, 1);
 319                list_del(&trans->list);
 320                kfree(trans);
 321        }
 322
 323        list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
 324                unregister_xenbus_watch(&watch->watch);
 325                list_del(&watch->list);
 326                free_watch_adapter(watch);
 327        }
 328
 329        list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
 330                list_del(&rb->list);
 331                kfree(rb);
 332        }
 333        kfree(u);
 334}
 335
 336static struct xenbus_transaction_holder *xenbus_get_transaction(
 337        struct xenbus_file_priv *u, uint32_t tx_id)
 338{
 339        struct xenbus_transaction_holder *trans;
 340
 341        list_for_each_entry(trans, &u->transactions, list)
 342                if (trans->handle.id == tx_id)
 343                        return trans;
 344
 345        return NULL;
 346}
 347
 348void xenbus_dev_queue_reply(struct xb_req_data *req)
 349{
 350        struct xenbus_file_priv *u = req->par;
 351        struct xenbus_transaction_holder *trans = NULL;
 352        int rc;
 353        LIST_HEAD(staging_q);
 354
 355        xs_request_exit(req);
 356
 357        mutex_lock(&u->msgbuffer_mutex);
 358
 359        if (req->type == XS_TRANSACTION_START) {
 360                trans = xenbus_get_transaction(u, 0);
 361                if (WARN_ON(!trans))
 362                        goto out;
 363                if (req->msg.type == XS_ERROR) {
 364                        list_del(&trans->list);
 365                        kfree(trans);
 366                } else {
 367                        rc = kstrtou32(req->body, 10, &trans->handle.id);
 368                        if (WARN_ON(rc))
 369                                goto out;
 370                }
 371        } else if (req->type == XS_TRANSACTION_END) {
 372                trans = xenbus_get_transaction(u, req->msg.tx_id);
 373                if (WARN_ON(!trans))
 374                        goto out;
 375                list_del(&trans->list);
 376                kfree(trans);
 377        }
 378
 379        mutex_unlock(&u->msgbuffer_mutex);
 380
 381        mutex_lock(&u->reply_mutex);
 382        rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
 383        if (!rc)
 384                rc = queue_reply(&staging_q, req->body, req->msg.len);
 385        if (!rc) {
 386                list_splice_tail(&staging_q, &u->read_buffers);
 387                wake_up(&u->read_waitq);
 388        } else {
 389                queue_cleanup(&staging_q);
 390        }
 391        mutex_unlock(&u->reply_mutex);
 392
 393        kfree(req->body);
 394        kfree(req);
 395
 396        kref_put(&u->kref, xenbus_file_free);
 397
 398        return;
 399
 400 out:
 401        mutex_unlock(&u->msgbuffer_mutex);
 402}
 403
 404static int xenbus_command_reply(struct xenbus_file_priv *u,
 405                                unsigned int msg_type, const char *reply)
 406{
 407        struct {
 408                struct xsd_sockmsg hdr;
 409                char body[16];
 410        } msg;
 411        int rc;
 412
 413        msg.hdr = u->u.msg;
 414        msg.hdr.type = msg_type;
 415        msg.hdr.len = strlen(reply) + 1;
 416        if (msg.hdr.len > sizeof(msg.body))
 417                return -E2BIG;
 418        memcpy(&msg.body, reply, msg.hdr.len);
 419
 420        mutex_lock(&u->reply_mutex);
 421        rc = queue_reply(&u->read_buffers, &msg, sizeof(msg.hdr) + msg.hdr.len);
 422        wake_up(&u->read_waitq);
 423        mutex_unlock(&u->reply_mutex);
 424
 425        if (!rc)
 426                kref_put(&u->kref, xenbus_file_free);
 427
 428        return rc;
 429}
 430
 431static int xenbus_write_transaction(unsigned msg_type,
 432                                    struct xenbus_file_priv *u)
 433{
 434        int rc;
 435        struct xenbus_transaction_holder *trans = NULL;
 436        struct {
 437                struct xsd_sockmsg hdr;
 438                char body[];
 439        } *msg = (void *)u->u.buffer;
 440
 441        if (msg_type == XS_TRANSACTION_START) {
 442                trans = kzalloc(sizeof(*trans), GFP_KERNEL);
 443                if (!trans) {
 444                        rc = -ENOMEM;
 445                        goto out;
 446                }
 447                trans->generation_id = xb_dev_generation_id;
 448                list_add(&trans->list, &u->transactions);
 449        } else if (msg->hdr.tx_id != 0 &&
 450                   !xenbus_get_transaction(u, msg->hdr.tx_id))
 451                return xenbus_command_reply(u, XS_ERROR, "ENOENT");
 452        else if (msg_type == XS_TRANSACTION_END &&
 453                 !(msg->hdr.len == 2 &&
 454                   (!strcmp(msg->body, "T") || !strcmp(msg->body, "F"))))
 455                return xenbus_command_reply(u, XS_ERROR, "EINVAL");
 456        else if (msg_type == XS_TRANSACTION_END) {
 457                trans = xenbus_get_transaction(u, msg->hdr.tx_id);
 458                if (trans && trans->generation_id != xb_dev_generation_id) {
 459                        list_del(&trans->list);
 460                        kfree(trans);
 461                        if (!strcmp(msg->body, "T"))
 462                                return xenbus_command_reply(u, XS_ERROR,
 463                                                            "EAGAIN");
 464                        else
 465                                return xenbus_command_reply(u,
 466                                                            XS_TRANSACTION_END,
 467                                                            "OK");
 468                }
 469        }
 470
 471        rc = xenbus_dev_request_and_reply(&msg->hdr, u);
 472        if (rc && trans) {
 473                list_del(&trans->list);
 474                kfree(trans);
 475        }
 476
 477out:
 478        return rc;
 479}
 480
 481static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
 482{
 483        struct watch_adapter *watch;
 484        char *path, *token;
 485        int err, rc;
 486
 487        path = u->u.buffer + sizeof(u->u.msg);
 488        token = memchr(path, 0, u->u.msg.len);
 489        if (token == NULL) {
 490                rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
 491                goto out;
 492        }
 493        token++;
 494        if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
 495                rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
 496                goto out;
 497        }
 498
 499        if (msg_type == XS_WATCH) {
 500                watch = alloc_watch_adapter(path, token);
 501                if (watch == NULL) {
 502                        rc = -ENOMEM;
 503                        goto out;
 504                }
 505
 506                watch->watch.callback = watch_fired;
 507                watch->dev_data = u;
 508
 509                err = register_xenbus_watch(&watch->watch);
 510                if (err) {
 511                        free_watch_adapter(watch);
 512                        rc = err;
 513                        goto out;
 514                }
 515                list_add(&watch->list, &u->watches);
 516        } else {
 517                list_for_each_entry(watch, &u->watches, list) {
 518                        if (!strcmp(watch->token, token) &&
 519                            !strcmp(watch->watch.node, path)) {
 520                                unregister_xenbus_watch(&watch->watch);
 521                                list_del(&watch->list);
 522                                free_watch_adapter(watch);
 523                                break;
 524                        }
 525                }
 526        }
 527
 528        /* Success.  Synthesize a reply to say all is OK. */
 529        rc = xenbus_command_reply(u, msg_type, "OK");
 530
 531out:
 532        return rc;
 533}
 534
 535static ssize_t xenbus_file_write(struct file *filp,
 536                                const char __user *ubuf,
 537                                size_t len, loff_t *ppos)
 538{
 539        struct xenbus_file_priv *u = filp->private_data;
 540        uint32_t msg_type;
 541        int rc = len;
 542        int ret;
 543
 544        /*
 545         * We're expecting usermode to be writing properly formed
 546         * xenbus messages.  If they write an incomplete message we
 547         * buffer it up.  Once it is complete, we act on it.
 548         */
 549
 550        /*
 551         * Make sure concurrent writers can't stomp all over each
 552         * other's messages and make a mess of our partial message
 553         * buffer.  We don't make any attemppt to stop multiple
 554         * writers from making a mess of each other's incomplete
 555         * messages; we're just trying to guarantee our own internal
 556         * consistency and make sure that single writes are handled
 557         * atomically.
 558         */
 559        mutex_lock(&u->msgbuffer_mutex);
 560
 561        /* Get this out of the way early to avoid confusion */
 562        if (len == 0)
 563                goto out;
 564
 565        /* Can't write a xenbus message larger we can buffer */
 566        if (len > sizeof(u->u.buffer) - u->len) {
 567                /* On error, dump existing buffer */
 568                u->len = 0;
 569                rc = -EINVAL;
 570                goto out;
 571        }
 572
 573        ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
 574
 575        if (ret != 0) {
 576                rc = -EFAULT;
 577                goto out;
 578        }
 579
 580        /* Deal with a partial copy. */
 581        len -= ret;
 582        rc = len;
 583
 584        u->len += len;
 585
 586        /* Return if we haven't got a full message yet */
 587        if (u->len < sizeof(u->u.msg))
 588                goto out;       /* not even the header yet */
 589
 590        /* If we're expecting a message that's larger than we can
 591           possibly send, dump what we have and return an error. */
 592        if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
 593                rc = -E2BIG;
 594                u->len = 0;
 595                goto out;
 596        }
 597
 598        if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
 599                goto out;       /* incomplete data portion */
 600
 601        /*
 602         * OK, now we have a complete message.  Do something with it.
 603         */
 604
 605        kref_get(&u->kref);
 606
 607        msg_type = u->u.msg.type;
 608
 609        switch (msg_type) {
 610        case XS_WATCH:
 611        case XS_UNWATCH:
 612                /* (Un)Ask for some path to be watched for changes */
 613                ret = xenbus_write_watch(msg_type, u);
 614                break;
 615
 616        default:
 617                /* Send out a transaction */
 618                ret = xenbus_write_transaction(msg_type, u);
 619                break;
 620        }
 621        if (ret != 0) {
 622                rc = ret;
 623                kref_put(&u->kref, xenbus_file_free);
 624        }
 625
 626        /* Buffered message consumed */
 627        u->len = 0;
 628
 629 out:
 630        mutex_unlock(&u->msgbuffer_mutex);
 631        return rc;
 632}
 633
 634static int xenbus_file_open(struct inode *inode, struct file *filp)
 635{
 636        struct xenbus_file_priv *u;
 637
 638        if (xen_store_evtchn == 0)
 639                return -ENOENT;
 640
 641        stream_open(inode, filp);
 642
 643        u = kzalloc(sizeof(*u), GFP_KERNEL);
 644        if (u == NULL)
 645                return -ENOMEM;
 646
 647        kref_init(&u->kref);
 648
 649        INIT_LIST_HEAD(&u->transactions);
 650        INIT_LIST_HEAD(&u->watches);
 651        INIT_LIST_HEAD(&u->read_buffers);
 652        init_waitqueue_head(&u->read_waitq);
 653
 654        mutex_init(&u->reply_mutex);
 655        mutex_init(&u->msgbuffer_mutex);
 656
 657        filp->private_data = u;
 658
 659        return 0;
 660}
 661
 662static int xenbus_file_release(struct inode *inode, struct file *filp)
 663{
 664        struct xenbus_file_priv *u = filp->private_data;
 665
 666        kref_put(&u->kref, xenbus_file_free);
 667
 668        return 0;
 669}
 670
 671static __poll_t xenbus_file_poll(struct file *file, poll_table *wait)
 672{
 673        struct xenbus_file_priv *u = file->private_data;
 674
 675        poll_wait(file, &u->read_waitq, wait);
 676        if (!list_empty(&u->read_buffers))
 677                return EPOLLIN | EPOLLRDNORM;
 678        return 0;
 679}
 680
 681const struct file_operations xen_xenbus_fops = {
 682        .read = xenbus_file_read,
 683        .write = xenbus_file_write,
 684        .open = xenbus_file_open,
 685        .release = xenbus_file_release,
 686        .poll = xenbus_file_poll,
 687        .llseek = no_llseek,
 688};
 689EXPORT_SYMBOL_GPL(xen_xenbus_fops);
 690
 691static struct miscdevice xenbus_dev = {
 692        .minor = MISC_DYNAMIC_MINOR,
 693        .name = "xen/xenbus",
 694        .fops = &xen_xenbus_fops,
 695};
 696
 697static int __init xenbus_init(void)
 698{
 699        int err;
 700
 701        if (!xen_domain())
 702                return -ENODEV;
 703
 704        err = misc_register(&xenbus_dev);
 705        if (err)
 706                pr_err("Could not register xenbus frontend device\n");
 707        return err;
 708}
 709device_initcall(xenbus_init);
 710