linux/drivers/xen/xenbus/xenbus_comms.c
<<
>>
Prefs
   1/******************************************************************************
   2 * xenbus_comms.c
   3 *
   4 * Low level code to talks to Xen Store: ringbuffer and event channel.
   5 *
   6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License version 2
  10 * as published by the Free Software Foundation; or, when distributed
  11 * separately from the Linux kernel or incorporated into other
  12 * software packages, subject to the following license:
  13 *
  14 * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 * of this source file (the "Software"), to deal in the Software without
  16 * restriction, including without limitation the rights to use, copy, modify,
  17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18 * and to permit persons to whom the Software is furnished to do so, subject to
  19 * the following conditions:
  20 *
  21 * The above copyright notice and this permission notice shall be included in
  22 * all copies or substantial portions of the Software.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30 * IN THE SOFTWARE.
  31 */
  32
  33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34
  35#include <linux/wait.h>
  36#include <linux/interrupt.h>
  37#include <linux/kthread.h>
  38#include <linux/sched.h>
  39#include <linux/err.h>
  40#include <xen/xenbus.h>
  41#include <asm/xen/hypervisor.h>
  42#include <xen/events.h>
  43#include <xen/page.h>
  44#include "xenbus.h"
  45
  46/* A list of replies. Currently only one will ever be outstanding. */
  47LIST_HEAD(xs_reply_list);
  48
  49/* A list of write requests. */
  50LIST_HEAD(xb_write_list);
  51DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
  52DEFINE_MUTEX(xb_write_mutex);
  53
  54/* Protect xenbus reader thread against save/restore. */
  55DEFINE_MUTEX(xs_response_mutex);
  56
  57static int xenbus_irq;
  58static struct task_struct *xenbus_task;
  59
  60static DECLARE_WORK(probe_work, xenbus_probe);
  61
  62
  63static irqreturn_t wake_waiting(int irq, void *unused)
  64{
  65        if (unlikely(xenstored_ready == 0)) {
  66                xenstored_ready = 1;
  67                schedule_work(&probe_work);
  68        }
  69
  70        wake_up(&xb_waitq);
  71        return IRQ_HANDLED;
  72}
  73
  74static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
  75{
  76        return ((prod - cons) <= XENSTORE_RING_SIZE);
  77}
  78
  79static void *get_output_chunk(XENSTORE_RING_IDX cons,
  80                              XENSTORE_RING_IDX prod,
  81                              char *buf, uint32_t *len)
  82{
  83        *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
  84        if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
  85                *len = XENSTORE_RING_SIZE - (prod - cons);
  86        return buf + MASK_XENSTORE_IDX(prod);
  87}
  88
  89static const void *get_input_chunk(XENSTORE_RING_IDX cons,
  90                                   XENSTORE_RING_IDX prod,
  91                                   const char *buf, uint32_t *len)
  92{
  93        *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
  94        if ((prod - cons) < *len)
  95                *len = prod - cons;
  96        return buf + MASK_XENSTORE_IDX(cons);
  97}
  98
  99static int xb_data_to_write(void)
 100{
 101        struct xenstore_domain_interface *intf = xen_store_interface;
 102
 103        return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
 104                !list_empty(&xb_write_list);
 105}
 106
 107/**
 108 * xb_write - low level write
 109 * @data: buffer to send
 110 * @len: length of buffer
 111 *
 112 * Returns number of bytes written or -err.
 113 */
 114static int xb_write(const void *data, unsigned int len)
 115{
 116        struct xenstore_domain_interface *intf = xen_store_interface;
 117        XENSTORE_RING_IDX cons, prod;
 118        unsigned int bytes = 0;
 119
 120        while (len != 0) {
 121                void *dst;
 122                unsigned int avail;
 123
 124                /* Read indexes, then verify. */
 125                cons = intf->req_cons;
 126                prod = intf->req_prod;
 127                if (!check_indexes(cons, prod)) {
 128                        intf->req_cons = intf->req_prod = 0;
 129                        return -EIO;
 130                }
 131                if (!xb_data_to_write())
 132                        return bytes;
 133
 134                /* Must write data /after/ reading the consumer index. */
 135                virt_mb();
 136
 137                dst = get_output_chunk(cons, prod, intf->req, &avail);
 138                if (avail == 0)
 139                        continue;
 140                if (avail > len)
 141                        avail = len;
 142
 143                memcpy(dst, data, avail);
 144                data += avail;
 145                len -= avail;
 146                bytes += avail;
 147
 148                /* Other side must not see new producer until data is there. */
 149                virt_wmb();
 150                intf->req_prod += avail;
 151
 152                /* Implies mb(): other side will see the updated producer. */
 153                if (prod <= intf->req_cons)
 154                        notify_remote_via_evtchn(xen_store_evtchn);
 155        }
 156
 157        return bytes;
 158}
 159
 160static int xb_data_to_read(void)
 161{
 162        struct xenstore_domain_interface *intf = xen_store_interface;
 163        return (intf->rsp_cons != intf->rsp_prod);
 164}
 165
 166static int xb_read(void *data, unsigned int len)
 167{
 168        struct xenstore_domain_interface *intf = xen_store_interface;
 169        XENSTORE_RING_IDX cons, prod;
 170        unsigned int bytes = 0;
 171
 172        while (len != 0) {
 173                unsigned int avail;
 174                const char *src;
 175
 176                /* Read indexes, then verify. */
 177                cons = intf->rsp_cons;
 178                prod = intf->rsp_prod;
 179                if (cons == prod)
 180                        return bytes;
 181
 182                if (!check_indexes(cons, prod)) {
 183                        intf->rsp_cons = intf->rsp_prod = 0;
 184                        return -EIO;
 185                }
 186
 187                src = get_input_chunk(cons, prod, intf->rsp, &avail);
 188                if (avail == 0)
 189                        continue;
 190                if (avail > len)
 191                        avail = len;
 192
 193                /* Must read data /after/ reading the producer index. */
 194                virt_rmb();
 195
 196                memcpy(data, src, avail);
 197                data += avail;
 198                len -= avail;
 199                bytes += avail;
 200
 201                /* Other side must not see free space until we've copied out */
 202                virt_mb();
 203                intf->rsp_cons += avail;
 204
 205                /* Implies mb(): other side will see the updated consumer. */
 206                if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
 207                        notify_remote_via_evtchn(xen_store_evtchn);
 208        }
 209
 210        return bytes;
 211}
 212
 213static int process_msg(void)
 214{
 215        static struct {
 216                struct xsd_sockmsg msg;
 217                char *body;
 218                union {
 219                        void *alloc;
 220                        struct xs_watch_event *watch;
 221                };
 222                bool in_msg;
 223                bool in_hdr;
 224                unsigned int read;
 225        } state;
 226        struct xb_req_data *req;
 227        int err;
 228        unsigned int len;
 229
 230        if (!state.in_msg) {
 231                state.in_msg = true;
 232                state.in_hdr = true;
 233                state.read = 0;
 234
 235                /*
 236                 * We must disallow save/restore while reading a message.
 237                 * A partial read across s/r leaves us out of sync with
 238                 * xenstored.
 239                 * xs_response_mutex is locked as long as we are processing one
 240                 * message. state.in_msg will be true as long as we are holding
 241                 * the lock here.
 242                 */
 243                mutex_lock(&xs_response_mutex);
 244
 245                if (!xb_data_to_read()) {
 246                        /* We raced with save/restore: pending data 'gone'. */
 247                        mutex_unlock(&xs_response_mutex);
 248                        state.in_msg = false;
 249                        return 0;
 250                }
 251        }
 252
 253        if (state.in_hdr) {
 254                if (state.read != sizeof(state.msg)) {
 255                        err = xb_read((void *)&state.msg + state.read,
 256                                      sizeof(state.msg) - state.read);
 257                        if (err < 0)
 258                                goto out;
 259                        state.read += err;
 260                        if (state.read != sizeof(state.msg))
 261                                return 0;
 262                        if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
 263                                err = -EINVAL;
 264                                goto out;
 265                        }
 266                }
 267
 268                len = state.msg.len + 1;
 269                if (state.msg.type == XS_WATCH_EVENT)
 270                        len += sizeof(*state.watch);
 271
 272                state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
 273                if (!state.alloc)
 274                        return -ENOMEM;
 275
 276                if (state.msg.type == XS_WATCH_EVENT)
 277                        state.body = state.watch->body;
 278                else
 279                        state.body = state.alloc;
 280                state.in_hdr = false;
 281                state.read = 0;
 282        }
 283
 284        err = xb_read(state.body + state.read, state.msg.len - state.read);
 285        if (err < 0)
 286                goto out;
 287
 288        state.read += err;
 289        if (state.read != state.msg.len)
 290                return 0;
 291
 292        state.body[state.msg.len] = '\0';
 293
 294        if (state.msg.type == XS_WATCH_EVENT) {
 295                state.watch->len = state.msg.len;
 296                err = xs_watch_msg(state.watch);
 297        } else {
 298                err = -ENOENT;
 299                mutex_lock(&xb_write_mutex);
 300                list_for_each_entry(req, &xs_reply_list, list) {
 301                        if (req->msg.req_id == state.msg.req_id) {
 302                                list_del(&req->list);
 303                                err = 0;
 304                                break;
 305                        }
 306                }
 307                mutex_unlock(&xb_write_mutex);
 308                if (err)
 309                        goto out;
 310
 311                if (req->state == xb_req_state_wait_reply) {
 312                        req->msg.req_id = req->caller_req_id;
 313                        req->msg.type = state.msg.type;
 314                        req->msg.len = state.msg.len;
 315                        req->body = state.body;
 316                        req->state = xb_req_state_got_reply;
 317                        req->cb(req);
 318                } else
 319                        kfree(req);
 320        }
 321
 322        mutex_unlock(&xs_response_mutex);
 323
 324        state.in_msg = false;
 325        state.alloc = NULL;
 326        return err;
 327
 328 out:
 329        mutex_unlock(&xs_response_mutex);
 330        state.in_msg = false;
 331        kfree(state.alloc);
 332        state.alloc = NULL;
 333        return err;
 334}
 335
 336static int process_writes(void)
 337{
 338        static struct {
 339                struct xb_req_data *req;
 340                int idx;
 341                unsigned int written;
 342        } state;
 343        void *base;
 344        unsigned int len;
 345        int err = 0;
 346
 347        if (!xb_data_to_write())
 348                return 0;
 349
 350        mutex_lock(&xb_write_mutex);
 351
 352        if (!state.req) {
 353                state.req = list_first_entry(&xb_write_list,
 354                                             struct xb_req_data, list);
 355                state.idx = -1;
 356                state.written = 0;
 357        }
 358
 359        if (state.req->state == xb_req_state_aborted)
 360                goto out_err;
 361
 362        while (state.idx < state.req->num_vecs) {
 363                if (state.idx < 0) {
 364                        base = &state.req->msg;
 365                        len = sizeof(state.req->msg);
 366                } else {
 367                        base = state.req->vec[state.idx].iov_base;
 368                        len = state.req->vec[state.idx].iov_len;
 369                }
 370                err = xb_write(base + state.written, len - state.written);
 371                if (err < 0)
 372                        goto out_err;
 373                state.written += err;
 374                if (state.written != len)
 375                        goto out;
 376
 377                state.idx++;
 378                state.written = 0;
 379        }
 380
 381        list_del(&state.req->list);
 382        state.req->state = xb_req_state_wait_reply;
 383        list_add_tail(&state.req->list, &xs_reply_list);
 384        state.req = NULL;
 385
 386 out:
 387        mutex_unlock(&xb_write_mutex);
 388
 389        return 0;
 390
 391 out_err:
 392        state.req->msg.type = XS_ERROR;
 393        state.req->err = err;
 394        list_del(&state.req->list);
 395        if (state.req->state == xb_req_state_aborted)
 396                kfree(state.req);
 397        else {
 398                state.req->state = xb_req_state_got_reply;
 399                wake_up(&state.req->wq);
 400        }
 401
 402        mutex_unlock(&xb_write_mutex);
 403
 404        state.req = NULL;
 405
 406        return err;
 407}
 408
 409static int xb_thread_work(void)
 410{
 411        return xb_data_to_read() || xb_data_to_write();
 412}
 413
 414static int xenbus_thread(void *unused)
 415{
 416        int err;
 417
 418        while (!kthread_should_stop()) {
 419                if (wait_event_interruptible(xb_waitq, xb_thread_work()))
 420                        continue;
 421
 422                err = process_msg();
 423                if (err == -ENOMEM)
 424                        schedule();
 425                else if (err)
 426                        pr_warn_ratelimited("error %d while reading message\n",
 427                                            err);
 428
 429                err = process_writes();
 430                if (err)
 431                        pr_warn_ratelimited("error %d while writing message\n",
 432                                            err);
 433        }
 434
 435        xenbus_task = NULL;
 436        return 0;
 437}
 438
 439/**
 440 * xb_init_comms - Set up interrupt handler off store event channel.
 441 */
 442int xb_init_comms(void)
 443{
 444        struct xenstore_domain_interface *intf = xen_store_interface;
 445
 446        if (intf->req_prod != intf->req_cons)
 447                pr_err("request ring is not quiescent (%08x:%08x)!\n",
 448                       intf->req_cons, intf->req_prod);
 449
 450        if (intf->rsp_prod != intf->rsp_cons) {
 451                pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
 452                        intf->rsp_cons, intf->rsp_prod);
 453                /* breaks kdump */
 454                if (!reset_devices)
 455                        intf->rsp_cons = intf->rsp_prod;
 456        }
 457
 458        if (xenbus_irq) {
 459                /* Already have an irq; assume we're resuming */
 460                rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
 461        } else {
 462                int err;
 463
 464                err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
 465                                                0, "xenbus", &xb_waitq);
 466                if (err < 0) {
 467                        pr_err("request irq failed %i\n", err);
 468                        return err;
 469                }
 470
 471                xenbus_irq = err;
 472
 473                if (!xenbus_task) {
 474                        xenbus_task = kthread_run(xenbus_thread, NULL,
 475                                                  "xenbus");
 476                        if (IS_ERR(xenbus_task))
 477                                return PTR_ERR(xenbus_task);
 478                }
 479        }
 480
 481        return 0;
 482}
 483
 484void xb_deinit_comms(void)
 485{
 486        unbind_from_irqhandler(xenbus_irq, &xb_waitq);
 487        xenbus_irq = 0;
 488}
 489