linux/drivers/xen/xenbus/xenbus_comms.c
<<
>>
Prefs
   1/******************************************************************************
   2 * xenbus_comms.c
   3 *
   4 * Low level code to talks to Xen Store: ringbuffer and event channel.
   5 *
   6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License version 2
  10 * as published by the Free Software Foundation; or, when distributed
  11 * separately from the Linux kernel or incorporated into other
  12 * software packages, subject to the following license:
  13 *
  14 * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 * of this source file (the "Software"), to deal in the Software without
  16 * restriction, including without limitation the rights to use, copy, modify,
  17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18 * and to permit persons to whom the Software is furnished to do so, subject to
  19 * the following conditions:
  20 *
  21 * The above copyright notice and this permission notice shall be included in
  22 * all copies or substantial portions of the Software.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30 * IN THE SOFTWARE.
  31 */
  32
  33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34
  35#include <linux/wait.h>
  36#include <linux/interrupt.h>
  37#include <linux/kthread.h>
  38#include <linux/sched.h>
  39#include <linux/err.h>
  40#include <xen/xenbus.h>
  41#include <asm/xen/hypervisor.h>
  42#include <xen/events.h>
  43#include <xen/page.h>
  44#include "xenbus.h"
  45
  46/* A list of replies. Currently only one will ever be outstanding. */
  47LIST_HEAD(xs_reply_list);
  48
  49/* A list of write requests. */
  50LIST_HEAD(xb_write_list);
  51DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
  52DEFINE_MUTEX(xb_write_mutex);
  53
  54/* Protect xenbus reader thread against save/restore. */
  55DEFINE_MUTEX(xs_response_mutex);
  56
  57static int xenbus_irq;
  58static struct task_struct *xenbus_task;
  59
  60static irqreturn_t wake_waiting(int irq, void *unused)
  61{
  62        wake_up(&xb_waitq);
  63        return IRQ_HANDLED;
  64}
  65
  66static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
  67{
  68        return ((prod - cons) <= XENSTORE_RING_SIZE);
  69}
  70
  71static void *get_output_chunk(XENSTORE_RING_IDX cons,
  72                              XENSTORE_RING_IDX prod,
  73                              char *buf, uint32_t *len)
  74{
  75        *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
  76        if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
  77                *len = XENSTORE_RING_SIZE - (prod - cons);
  78        return buf + MASK_XENSTORE_IDX(prod);
  79}
  80
  81static const void *get_input_chunk(XENSTORE_RING_IDX cons,
  82                                   XENSTORE_RING_IDX prod,
  83                                   const char *buf, uint32_t *len)
  84{
  85        *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
  86        if ((prod - cons) < *len)
  87                *len = prod - cons;
  88        return buf + MASK_XENSTORE_IDX(cons);
  89}
  90
  91static int xb_data_to_write(void)
  92{
  93        struct xenstore_domain_interface *intf = xen_store_interface;
  94
  95        return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
  96                !list_empty(&xb_write_list);
  97}
  98
  99/**
 100 * xb_write - low level write
 101 * @data: buffer to send
 102 * @len: length of buffer
 103 *
 104 * Returns number of bytes written or -err.
 105 */
 106static int xb_write(const void *data, unsigned int len)
 107{
 108        struct xenstore_domain_interface *intf = xen_store_interface;
 109        XENSTORE_RING_IDX cons, prod;
 110        unsigned int bytes = 0;
 111
 112        while (len != 0) {
 113                void *dst;
 114                unsigned int avail;
 115
 116                /* Read indexes, then verify. */
 117                cons = intf->req_cons;
 118                prod = intf->req_prod;
 119                if (!check_indexes(cons, prod)) {
 120                        intf->req_cons = intf->req_prod = 0;
 121                        return -EIO;
 122                }
 123                if (!xb_data_to_write())
 124                        return bytes;
 125
 126                /* Must write data /after/ reading the consumer index. */
 127                virt_mb();
 128
 129                dst = get_output_chunk(cons, prod, intf->req, &avail);
 130                if (avail == 0)
 131                        continue;
 132                if (avail > len)
 133                        avail = len;
 134
 135                memcpy(dst, data, avail);
 136                data += avail;
 137                len -= avail;
 138                bytes += avail;
 139
 140                /* Other side must not see new producer until data is there. */
 141                virt_wmb();
 142                intf->req_prod += avail;
 143
 144                /* Implies mb(): other side will see the updated producer. */
 145                if (prod <= intf->req_cons)
 146                        notify_remote_via_evtchn(xen_store_evtchn);
 147        }
 148
 149        return bytes;
 150}
 151
 152static int xb_data_to_read(void)
 153{
 154        struct xenstore_domain_interface *intf = xen_store_interface;
 155        return (intf->rsp_cons != intf->rsp_prod);
 156}
 157
 158static int xb_read(void *data, unsigned int len)
 159{
 160        struct xenstore_domain_interface *intf = xen_store_interface;
 161        XENSTORE_RING_IDX cons, prod;
 162        unsigned int bytes = 0;
 163
 164        while (len != 0) {
 165                unsigned int avail;
 166                const char *src;
 167
 168                /* Read indexes, then verify. */
 169                cons = intf->rsp_cons;
 170                prod = intf->rsp_prod;
 171                if (cons == prod)
 172                        return bytes;
 173
 174                if (!check_indexes(cons, prod)) {
 175                        intf->rsp_cons = intf->rsp_prod = 0;
 176                        return -EIO;
 177                }
 178
 179                src = get_input_chunk(cons, prod, intf->rsp, &avail);
 180                if (avail == 0)
 181                        continue;
 182                if (avail > len)
 183                        avail = len;
 184
 185                /* Must read data /after/ reading the producer index. */
 186                virt_rmb();
 187
 188                memcpy(data, src, avail);
 189                data += avail;
 190                len -= avail;
 191                bytes += avail;
 192
 193                /* Other side must not see free space until we've copied out */
 194                virt_mb();
 195                intf->rsp_cons += avail;
 196
 197                /* Implies mb(): other side will see the updated consumer. */
 198                if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
 199                        notify_remote_via_evtchn(xen_store_evtchn);
 200        }
 201
 202        return bytes;
 203}
 204
 205static int process_msg(void)
 206{
 207        static struct {
 208                struct xsd_sockmsg msg;
 209                char *body;
 210                union {
 211                        void *alloc;
 212                        struct xs_watch_event *watch;
 213                };
 214                bool in_msg;
 215                bool in_hdr;
 216                unsigned int read;
 217        } state;
 218        struct xb_req_data *req;
 219        int err;
 220        unsigned int len;
 221
 222        if (!state.in_msg) {
 223                state.in_msg = true;
 224                state.in_hdr = true;
 225                state.read = 0;
 226
 227                /*
 228                 * We must disallow save/restore while reading a message.
 229                 * A partial read across s/r leaves us out of sync with
 230                 * xenstored.
 231                 * xs_response_mutex is locked as long as we are processing one
 232                 * message. state.in_msg will be true as long as we are holding
 233                 * the lock here.
 234                 */
 235                mutex_lock(&xs_response_mutex);
 236
 237                if (!xb_data_to_read()) {
 238                        /* We raced with save/restore: pending data 'gone'. */
 239                        mutex_unlock(&xs_response_mutex);
 240                        state.in_msg = false;
 241                        return 0;
 242                }
 243        }
 244
 245        if (state.in_hdr) {
 246                if (state.read != sizeof(state.msg)) {
 247                        err = xb_read((void *)&state.msg + state.read,
 248                                      sizeof(state.msg) - state.read);
 249                        if (err < 0)
 250                                goto out;
 251                        state.read += err;
 252                        if (state.read != sizeof(state.msg))
 253                                return 0;
 254                        if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
 255                                err = -EINVAL;
 256                                goto out;
 257                        }
 258                }
 259
 260                len = state.msg.len + 1;
 261                if (state.msg.type == XS_WATCH_EVENT)
 262                        len += sizeof(*state.watch);
 263
 264                state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
 265                if (!state.alloc)
 266                        return -ENOMEM;
 267
 268                if (state.msg.type == XS_WATCH_EVENT)
 269                        state.body = state.watch->body;
 270                else
 271                        state.body = state.alloc;
 272                state.in_hdr = false;
 273                state.read = 0;
 274        }
 275
 276        err = xb_read(state.body + state.read, state.msg.len - state.read);
 277        if (err < 0)
 278                goto out;
 279
 280        state.read += err;
 281        if (state.read != state.msg.len)
 282                return 0;
 283
 284        state.body[state.msg.len] = '\0';
 285
 286        if (state.msg.type == XS_WATCH_EVENT) {
 287                state.watch->len = state.msg.len;
 288                err = xs_watch_msg(state.watch);
 289        } else {
 290                err = -ENOENT;
 291                mutex_lock(&xb_write_mutex);
 292                list_for_each_entry(req, &xs_reply_list, list) {
 293                        if (req->msg.req_id == state.msg.req_id) {
 294                                list_del(&req->list);
 295                                err = 0;
 296                                break;
 297                        }
 298                }
 299                mutex_unlock(&xb_write_mutex);
 300                if (err)
 301                        goto out;
 302
 303                if (req->state == xb_req_state_wait_reply) {
 304                        req->msg.req_id = req->caller_req_id;
 305                        req->msg.type = state.msg.type;
 306                        req->msg.len = state.msg.len;
 307                        req->body = state.body;
 308                        /* write body, then update state */
 309                        virt_wmb();
 310                        req->state = xb_req_state_got_reply;
 311                        req->cb(req);
 312                } else
 313                        kfree(req);
 314        }
 315
 316        mutex_unlock(&xs_response_mutex);
 317
 318        state.in_msg = false;
 319        state.alloc = NULL;
 320        return err;
 321
 322 out:
 323        mutex_unlock(&xs_response_mutex);
 324        state.in_msg = false;
 325        kfree(state.alloc);
 326        state.alloc = NULL;
 327        return err;
 328}
 329
 330static int process_writes(void)
 331{
 332        static struct {
 333                struct xb_req_data *req;
 334                int idx;
 335                unsigned int written;
 336        } state;
 337        void *base;
 338        unsigned int len;
 339        int err = 0;
 340
 341        if (!xb_data_to_write())
 342                return 0;
 343
 344        mutex_lock(&xb_write_mutex);
 345
 346        if (!state.req) {
 347                state.req = list_first_entry(&xb_write_list,
 348                                             struct xb_req_data, list);
 349                state.idx = -1;
 350                state.written = 0;
 351        }
 352
 353        if (state.req->state == xb_req_state_aborted)
 354                goto out_err;
 355
 356        while (state.idx < state.req->num_vecs) {
 357                if (state.idx < 0) {
 358                        base = &state.req->msg;
 359                        len = sizeof(state.req->msg);
 360                } else {
 361                        base = state.req->vec[state.idx].iov_base;
 362                        len = state.req->vec[state.idx].iov_len;
 363                }
 364                err = xb_write(base + state.written, len - state.written);
 365                if (err < 0)
 366                        goto out_err;
 367                state.written += err;
 368                if (state.written != len)
 369                        goto out;
 370
 371                state.idx++;
 372                state.written = 0;
 373        }
 374
 375        list_del(&state.req->list);
 376        state.req->state = xb_req_state_wait_reply;
 377        list_add_tail(&state.req->list, &xs_reply_list);
 378        state.req = NULL;
 379
 380 out:
 381        mutex_unlock(&xb_write_mutex);
 382
 383        return 0;
 384
 385 out_err:
 386        state.req->msg.type = XS_ERROR;
 387        state.req->err = err;
 388        list_del(&state.req->list);
 389        if (state.req->state == xb_req_state_aborted)
 390                kfree(state.req);
 391        else {
 392                /* write err, then update state */
 393                virt_wmb();
 394                state.req->state = xb_req_state_got_reply;
 395                wake_up(&state.req->wq);
 396        }
 397
 398        mutex_unlock(&xb_write_mutex);
 399
 400        state.req = NULL;
 401
 402        return err;
 403}
 404
 405static int xb_thread_work(void)
 406{
 407        return xb_data_to_read() || xb_data_to_write();
 408}
 409
 410static int xenbus_thread(void *unused)
 411{
 412        int err;
 413
 414        while (!kthread_should_stop()) {
 415                if (wait_event_interruptible(xb_waitq, xb_thread_work()))
 416                        continue;
 417
 418                err = process_msg();
 419                if (err == -ENOMEM)
 420                        schedule();
 421                else if (err)
 422                        pr_warn_ratelimited("error %d while reading message\n",
 423                                            err);
 424
 425                err = process_writes();
 426                if (err)
 427                        pr_warn_ratelimited("error %d while writing message\n",
 428                                            err);
 429        }
 430
 431        xenbus_task = NULL;
 432        return 0;
 433}
 434
 435/**
 436 * xb_init_comms - Set up interrupt handler off store event channel.
 437 */
 438int xb_init_comms(void)
 439{
 440        struct xenstore_domain_interface *intf = xen_store_interface;
 441
 442        if (intf->req_prod != intf->req_cons)
 443                pr_err("request ring is not quiescent (%08x:%08x)!\n",
 444                       intf->req_cons, intf->req_prod);
 445
 446        if (intf->rsp_prod != intf->rsp_cons) {
 447                pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
 448                        intf->rsp_cons, intf->rsp_prod);
 449                /* breaks kdump */
 450                if (!reset_devices)
 451                        intf->rsp_cons = intf->rsp_prod;
 452        }
 453
 454        if (xenbus_irq) {
 455                /* Already have an irq; assume we're resuming */
 456                rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
 457        } else {
 458                int err;
 459
 460                err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
 461                                                0, "xenbus", &xb_waitq);
 462                if (err < 0) {
 463                        pr_err("request irq failed %i\n", err);
 464                        return err;
 465                }
 466
 467                xenbus_irq = err;
 468
 469                if (!xenbus_task) {
 470                        xenbus_task = kthread_run(xenbus_thread, NULL,
 471                                                  "xenbus");
 472                        if (IS_ERR(xenbus_task))
 473                                return PTR_ERR(xenbus_task);
 474                }
 475        }
 476
 477        return 0;
 478}
 479
 480void xb_deinit_comms(void)
 481{
 482        unbind_from_irqhandler(xenbus_irq, &xb_waitq);
 483        xenbus_irq = 0;
 484}
 485