linux/drivers/xen/xenbus/xenbus_client.c
<<
>>
Prefs
   1/******************************************************************************
   2 * Client-facing interface for the Xenbus driver.  In other words, the
   3 * interface between the Xenbus and the device-specific code, be it the
   4 * frontend or the backend of that driver.
   5 *
   6 * Copyright (C) 2005 XenSource Ltd
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License version 2
  10 * as published by the Free Software Foundation; or, when distributed
  11 * separately from the Linux kernel or incorporated into other
  12 * software packages, subject to the following license:
  13 *
  14 * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 * of this source file (the "Software"), to deal in the Software without
  16 * restriction, including without limitation the rights to use, copy, modify,
  17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18 * and to permit persons to whom the Software is furnished to do so, subject to
  19 * the following conditions:
  20 *
  21 * The above copyright notice and this permission notice shall be included in
  22 * all copies or substantial portions of the Software.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30 * IN THE SOFTWARE.
  31 */
  32
  33#include <linux/mm.h>
  34#include <linux/slab.h>
  35#include <linux/types.h>
  36#include <linux/spinlock.h>
  37#include <linux/vmalloc.h>
  38#include <linux/export.h>
  39#include <asm/xen/hypervisor.h>
  40#include <xen/page.h>
  41#include <xen/interface/xen.h>
  42#include <xen/interface/event_channel.h>
  43#include <xen/balloon.h>
  44#include <xen/events.h>
  45#include <xen/grant_table.h>
  46#include <xen/xenbus.h>
  47#include <xen/xen.h>
  48#include <xen/features.h>
  49
  50#include "xenbus_probe.h"
  51
  52#define XENBUS_PAGES(_grants)   (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
  53
  54#define XENBUS_MAX_RING_PAGES   (XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
  55
  56struct xenbus_map_node {
  57        struct list_head next;
  58        union {
  59                struct {
  60                        struct vm_struct *area;
  61                } pv;
  62                struct {
  63                        struct page *pages[XENBUS_MAX_RING_PAGES];
  64                        unsigned long addrs[XENBUS_MAX_RING_GRANTS];
  65                        void *addr;
  66                } hvm;
  67        };
  68        grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
  69        unsigned int   nr_handles;
  70};
  71
  72static DEFINE_SPINLOCK(xenbus_valloc_lock);
  73static LIST_HEAD(xenbus_valloc_pages);
  74
  75struct xenbus_ring_ops {
  76        int (*map)(struct xenbus_device *dev,
  77                   grant_ref_t *gnt_refs, unsigned int nr_grefs,
  78                   void **vaddr);
  79        int (*unmap)(struct xenbus_device *dev, void *vaddr);
  80};
  81
  82static const struct xenbus_ring_ops *ring_ops __read_mostly;
  83
  84const char *xenbus_strstate(enum xenbus_state state)
  85{
  86        static const char *const name[] = {
  87                [ XenbusStateUnknown      ] = "Unknown",
  88                [ XenbusStateInitialising ] = "Initialising",
  89                [ XenbusStateInitWait     ] = "InitWait",
  90                [ XenbusStateInitialised  ] = "Initialised",
  91                [ XenbusStateConnected    ] = "Connected",
  92                [ XenbusStateClosing      ] = "Closing",
  93                [ XenbusStateClosed       ] = "Closed",
  94                [XenbusStateReconfiguring] = "Reconfiguring",
  95                [XenbusStateReconfigured] = "Reconfigured",
  96        };
  97        return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
  98}
  99EXPORT_SYMBOL_GPL(xenbus_strstate);
 100
 101/**
 102 * xenbus_watch_path - register a watch
 103 * @dev: xenbus device
 104 * @path: path to watch
 105 * @watch: watch to register
 106 * @callback: callback to register
 107 *
 108 * Register a @watch on the given path, using the given xenbus_watch structure
 109 * for storage, and the given @callback function as the callback.  Return 0 on
 110 * success, or -errno on error.  On success, the given @path will be saved as
 111 * @watch->node, and remains the caller's to free.  On error, @watch->node will
 112 * be NULL, the device will switch to %XenbusStateClosing, and the error will
 113 * be saved in the store.
 114 */
 115int xenbus_watch_path(struct xenbus_device *dev, const char *path,
 116                      struct xenbus_watch *watch,
 117                      void (*callback)(struct xenbus_watch *,
 118                                       const char **, unsigned int))
 119{
 120        int err;
 121
 122        watch->node = path;
 123        watch->callback = callback;
 124
 125        err = register_xenbus_watch(watch);
 126
 127        if (err) {
 128                watch->node = NULL;
 129                watch->callback = NULL;
 130                xenbus_dev_fatal(dev, err, "adding watch on %s", path);
 131        }
 132
 133        return err;
 134}
 135EXPORT_SYMBOL_GPL(xenbus_watch_path);
 136
 137
 138/**
 139 * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
 140 * @dev: xenbus device
 141 * @watch: watch to register
 142 * @callback: callback to register
 143 * @pathfmt: format of path to watch
 144 *
 145 * Register a watch on the given @path, using the given xenbus_watch
 146 * structure for storage, and the given @callback function as the callback.
 147 * Return 0 on success, or -errno on error.  On success, the watched path
 148 * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
 149 * kfree().  On error, watch->node will be NULL, so the caller has nothing to
 150 * free, the device will switch to %XenbusStateClosing, and the error will be
 151 * saved in the store.
 152 */
 153int xenbus_watch_pathfmt(struct xenbus_device *dev,
 154                         struct xenbus_watch *watch,
 155                         void (*callback)(struct xenbus_watch *,
 156                                        const char **, unsigned int),
 157                         const char *pathfmt, ...)
 158{
 159        int err;
 160        va_list ap;
 161        char *path;
 162
 163        va_start(ap, pathfmt);
 164        path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
 165        va_end(ap);
 166
 167        if (!path) {
 168                xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
 169                return -ENOMEM;
 170        }
 171        err = xenbus_watch_path(dev, path, watch, callback);
 172
 173        if (err)
 174                kfree(path);
 175        return err;
 176}
 177EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
 178
 179static void xenbus_switch_fatal(struct xenbus_device *, int, int,
 180                                const char *, ...);
 181
 182static int
 183__xenbus_switch_state(struct xenbus_device *dev,
 184                      enum xenbus_state state, int depth)
 185{
 186        /* We check whether the state is currently set to the given value, and
 187           if not, then the state is set.  We don't want to unconditionally
 188           write the given state, because we don't want to fire watches
 189           unnecessarily.  Furthermore, if the node has gone, we don't write
 190           to it, as the device will be tearing down, and we don't want to
 191           resurrect that directory.
 192
 193           Note that, because of this cached value of our state, this
 194           function will not take a caller's Xenstore transaction
 195           (something it was trying to in the past) because dev->state
 196           would not get reset if the transaction was aborted.
 197         */
 198
 199        struct xenbus_transaction xbt;
 200        int current_state;
 201        int err, abort;
 202
 203        if (state == dev->state)
 204                return 0;
 205
 206again:
 207        abort = 1;
 208
 209        err = xenbus_transaction_start(&xbt);
 210        if (err) {
 211                xenbus_switch_fatal(dev, depth, err, "starting transaction");
 212                return 0;
 213        }
 214
 215        err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
 216        if (err != 1)
 217                goto abort;
 218
 219        err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
 220        if (err) {
 221                xenbus_switch_fatal(dev, depth, err, "writing new state");
 222                goto abort;
 223        }
 224
 225        abort = 0;
 226abort:
 227        err = xenbus_transaction_end(xbt, abort);
 228        if (err) {
 229                if (err == -EAGAIN && !abort)
 230                        goto again;
 231                xenbus_switch_fatal(dev, depth, err, "ending transaction");
 232        } else
 233                dev->state = state;
 234
 235        return 0;
 236}
 237
 238/**
 239 * xenbus_switch_state
 240 * @dev: xenbus device
 241 * @state: new state
 242 *
 243 * Advertise in the store a change of the given driver to the given new_state.
 244 * Return 0 on success, or -errno on error.  On error, the device will switch
 245 * to XenbusStateClosing, and the error will be saved in the store.
 246 */
 247int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 248{
 249        return __xenbus_switch_state(dev, state, 0);
 250}
 251
 252EXPORT_SYMBOL_GPL(xenbus_switch_state);
 253
 254int xenbus_frontend_closed(struct xenbus_device *dev)
 255{
 256        xenbus_switch_state(dev, XenbusStateClosed);
 257        complete(&dev->down);
 258        return 0;
 259}
 260EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
 261
 262/**
 263 * Return the path to the error node for the given device, or NULL on failure.
 264 * If the value returned is non-NULL, then it is the caller's to kfree.
 265 */
 266static char *error_path(struct xenbus_device *dev)
 267{
 268        return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
 269}
 270
 271
 272static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
 273                                const char *fmt, va_list ap)
 274{
 275        unsigned int len;
 276        char *printf_buffer = NULL;
 277        char *path_buffer = NULL;
 278
 279#define PRINTF_BUFFER_SIZE 4096
 280        printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
 281        if (printf_buffer == NULL)
 282                goto fail;
 283
 284        len = sprintf(printf_buffer, "%i ", -err);
 285        vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
 286
 287        dev_err(&dev->dev, "%s\n", printf_buffer);
 288
 289        path_buffer = error_path(dev);
 290
 291        if (path_buffer == NULL) {
 292                dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
 293                       dev->nodename, printf_buffer);
 294                goto fail;
 295        }
 296
 297        if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
 298                dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
 299                       dev->nodename, printf_buffer);
 300                goto fail;
 301        }
 302
 303fail:
 304        kfree(printf_buffer);
 305        kfree(path_buffer);
 306}
 307
 308
 309/**
 310 * xenbus_dev_error
 311 * @dev: xenbus device
 312 * @err: error to report
 313 * @fmt: error message format
 314 *
 315 * Report the given negative errno into the store, along with the given
 316 * formatted message.
 317 */
 318void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
 319{
 320        va_list ap;
 321
 322        va_start(ap, fmt);
 323        xenbus_va_dev_error(dev, err, fmt, ap);
 324        va_end(ap);
 325}
 326EXPORT_SYMBOL_GPL(xenbus_dev_error);
 327
 328/**
 329 * xenbus_dev_fatal
 330 * @dev: xenbus device
 331 * @err: error to report
 332 * @fmt: error message format
 333 *
 334 * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
 335 * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
 336 * closedown of this driver and its peer.
 337 */
 338
 339void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
 340{
 341        va_list ap;
 342
 343        va_start(ap, fmt);
 344        xenbus_va_dev_error(dev, err, fmt, ap);
 345        va_end(ap);
 346
 347        xenbus_switch_state(dev, XenbusStateClosing);
 348}
 349EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
 350
 351/**
 352 * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
 353 * avoiding recursion within xenbus_switch_state.
 354 */
 355static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 356                                const char *fmt, ...)
 357{
 358        va_list ap;
 359
 360        va_start(ap, fmt);
 361        xenbus_va_dev_error(dev, err, fmt, ap);
 362        va_end(ap);
 363
 364        if (!depth)
 365                __xenbus_switch_state(dev, XenbusStateClosing, 1);
 366}
 367
 368/**
 369 * xenbus_grant_ring
 370 * @dev: xenbus device
 371 * @vaddr: starting virtual address of the ring
 372 * @nr_pages: number of pages to be granted
 373 * @grefs: grant reference array to be filled in
 374 *
 375 * Grant access to the given @vaddr to the peer of the given device.
 376 * Then fill in @grefs with grant references.  Return 0 on success, or
 377 * -errno on error.  On error, the device will switch to
 378 * XenbusStateClosing, and the error will be saved in the store.
 379 */
 380int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 381                      unsigned int nr_pages, grant_ref_t *grefs)
 382{
 383        int err;
 384        int i, j;
 385
 386        for (i = 0; i < nr_pages; i++) {
 387                err = gnttab_grant_foreign_access(dev->otherend_id,
 388                                                  virt_to_gfn(vaddr), 0);
 389                if (err < 0) {
 390                        xenbus_dev_fatal(dev, err,
 391                                         "granting access to ring page");
 392                        goto fail;
 393                }
 394                grefs[i] = err;
 395
 396                vaddr = vaddr + XEN_PAGE_SIZE;
 397        }
 398
 399        return 0;
 400
 401fail:
 402        for (j = 0; j < i; j++)
 403                gnttab_end_foreign_access_ref(grefs[j], 0);
 404        return err;
 405}
 406EXPORT_SYMBOL_GPL(xenbus_grant_ring);
 407
 408
 409/**
 410 * Allocate an event channel for the given xenbus_device, assigning the newly
 411 * created local port to *port.  Return 0 on success, or -errno on error.  On
 412 * error, the device will switch to XenbusStateClosing, and the error will be
 413 * saved in the store.
 414 */
 415int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
 416{
 417        struct evtchn_alloc_unbound alloc_unbound;
 418        int err;
 419
 420        alloc_unbound.dom = DOMID_SELF;
 421        alloc_unbound.remote_dom = dev->otherend_id;
 422
 423        err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
 424                                          &alloc_unbound);
 425        if (err)
 426                xenbus_dev_fatal(dev, err, "allocating event channel");
 427        else
 428                *port = alloc_unbound.port;
 429
 430        return err;
 431}
 432EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
 433
 434
 435/**
 436 * Free an existing event channel. Returns 0 on success or -errno on error.
 437 */
 438int xenbus_free_evtchn(struct xenbus_device *dev, int port)
 439{
 440        struct evtchn_close close;
 441        int err;
 442
 443        close.port = port;
 444
 445        err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
 446        if (err)
 447                xenbus_dev_error(dev, err, "freeing event channel %d", port);
 448
 449        return err;
 450}
 451EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 452
 453
 454/**
 455 * xenbus_map_ring_valloc
 456 * @dev: xenbus device
 457 * @gnt_refs: grant reference array
 458 * @nr_grefs: number of grant references
 459 * @vaddr: pointer to address to be filled out by mapping
 460 *
 461 * Map @nr_grefs pages of memory into this domain from another
 462 * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
 463 * pages of virtual address space, maps the pages to that address, and
 464 * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
 465 * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
 466 * error. If an error is returned, device will switch to
 467 * XenbusStateClosing and the error message will be saved in XenStore.
 468 */
 469int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
 470                           unsigned int nr_grefs, void **vaddr)
 471{
 472        return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
 473}
 474EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
 475
 476/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
 477 * long), e.g. 32-on-64.  Caller is responsible for preparing the
 478 * right array to feed into this function */
 479static int __xenbus_map_ring(struct xenbus_device *dev,
 480                             grant_ref_t *gnt_refs,
 481                             unsigned int nr_grefs,
 482                             grant_handle_t *handles,
 483                             phys_addr_t *addrs,
 484                             unsigned int flags,
 485                             bool *leaked)
 486{
 487        struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
 488        struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
 489        int i, j;
 490        int err = GNTST_okay;
 491
 492        if (nr_grefs > XENBUS_MAX_RING_GRANTS)
 493                return -EINVAL;
 494
 495        for (i = 0; i < nr_grefs; i++) {
 496                memset(&map[i], 0, sizeof(map[i]));
 497                gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
 498                                  dev->otherend_id);
 499                handles[i] = INVALID_GRANT_HANDLE;
 500        }
 501
 502        gnttab_batch_map(map, i);
 503
 504        for (i = 0; i < nr_grefs; i++) {
 505                if (map[i].status != GNTST_okay) {
 506                        err = map[i].status;
 507                        xenbus_dev_fatal(dev, map[i].status,
 508                                         "mapping in shared page %d from domain %d",
 509                                         gnt_refs[i], dev->otherend_id);
 510                        goto fail;
 511                } else
 512                        handles[i] = map[i].handle;
 513        }
 514
 515        return GNTST_okay;
 516
 517 fail:
 518        for (i = j = 0; i < nr_grefs; i++) {
 519                if (handles[i] != INVALID_GRANT_HANDLE) {
 520                        memset(&unmap[j], 0, sizeof(unmap[j]));
 521                        gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
 522                                            GNTMAP_host_map, handles[i]);
 523                        j++;
 524                }
 525        }
 526
 527        if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
 528                BUG();
 529
 530        *leaked = false;
 531        for (i = 0; i < j; i++) {
 532                if (unmap[i].status != GNTST_okay) {
 533                        *leaked = true;
 534                        break;
 535                }
 536        }
 537
 538        return err;
 539}
 540
 541static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 542                                     grant_ref_t *gnt_refs,
 543                                     unsigned int nr_grefs,
 544                                     void **vaddr)
 545{
 546        struct xenbus_map_node *node;
 547        struct vm_struct *area;
 548        pte_t *ptes[XENBUS_MAX_RING_GRANTS];
 549        phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
 550        int err = GNTST_okay;
 551        int i;
 552        bool leaked;
 553
 554        *vaddr = NULL;
 555
 556        if (nr_grefs > XENBUS_MAX_RING_GRANTS)
 557                return -EINVAL;
 558
 559        node = kzalloc(sizeof(*node), GFP_KERNEL);
 560        if (!node)
 561                return -ENOMEM;
 562
 563        area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
 564        if (!area) {
 565                kfree(node);
 566                return -ENOMEM;
 567        }
 568
 569        for (i = 0; i < nr_grefs; i++)
 570                phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
 571
 572        err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
 573                                phys_addrs,
 574                                GNTMAP_host_map | GNTMAP_contains_pte,
 575                                &leaked);
 576        if (err)
 577                goto failed;
 578
 579        node->nr_handles = nr_grefs;
 580        node->pv.area = area;
 581
 582        spin_lock(&xenbus_valloc_lock);
 583        list_add(&node->next, &xenbus_valloc_pages);
 584        spin_unlock(&xenbus_valloc_lock);
 585
 586        *vaddr = area->addr;
 587        return 0;
 588
 589failed:
 590        if (!leaked)
 591                free_vm_area(area);
 592        else
 593                pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
 594
 595        kfree(node);
 596        return err;
 597}
 598
 599struct map_ring_valloc_hvm
 600{
 601        unsigned int idx;
 602
 603        /* Why do we need two arrays? See comment of __xenbus_map_ring */
 604        phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
 605        unsigned long addrs[XENBUS_MAX_RING_GRANTS];
 606};
 607
 608static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
 609                                            unsigned int goffset,
 610                                            unsigned int len,
 611                                            void *data)
 612{
 613        struct map_ring_valloc_hvm *info = data;
 614        unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
 615
 616        info->phys_addrs[info->idx] = vaddr;
 617        info->addrs[info->idx] = vaddr;
 618
 619        info->idx++;
 620}
 621
 622static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 623                                      grant_ref_t *gnt_ref,
 624                                      unsigned int nr_grefs,
 625                                      void **vaddr)
 626{
 627        struct xenbus_map_node *node;
 628        int err;
 629        void *addr;
 630        bool leaked = false;
 631        struct map_ring_valloc_hvm info = {
 632                .idx = 0,
 633        };
 634        unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
 635
 636        if (nr_grefs > XENBUS_MAX_RING_GRANTS)
 637                return -EINVAL;
 638
 639        *vaddr = NULL;
 640
 641        node = kzalloc(sizeof(*node), GFP_KERNEL);
 642        if (!node)
 643                return -ENOMEM;
 644
 645        err = alloc_xenballooned_pages(nr_pages, node->hvm.pages);
 646        if (err)
 647                goto out_err;
 648
 649        gnttab_foreach_grant(node->hvm.pages, nr_grefs,
 650                             xenbus_map_ring_setup_grant_hvm,
 651                             &info);
 652
 653        err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
 654                                info.phys_addrs, GNTMAP_host_map, &leaked);
 655        node->nr_handles = nr_grefs;
 656
 657        if (err)
 658                goto out_free_ballooned_pages;
 659
 660        addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
 661                    PAGE_KERNEL);
 662        if (!addr) {
 663                err = -ENOMEM;
 664                goto out_xenbus_unmap_ring;
 665        }
 666
 667        node->hvm.addr = addr;
 668
 669        spin_lock(&xenbus_valloc_lock);
 670        list_add(&node->next, &xenbus_valloc_pages);
 671        spin_unlock(&xenbus_valloc_lock);
 672
 673        *vaddr = addr;
 674        return 0;
 675
 676 out_xenbus_unmap_ring:
 677        if (!leaked)
 678                xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs);
 679        else
 680                pr_alert("leaking %p size %u page(s)",
 681                         addr, nr_pages);
 682 out_free_ballooned_pages:
 683        if (!leaked)
 684                free_xenballooned_pages(nr_pages, node->hvm.pages);
 685 out_err:
 686        kfree(node);
 687        return err;
 688}
 689
 690
 691/**
 692 * xenbus_map_ring
 693 * @dev: xenbus device
 694 * @gnt_refs: grant reference array
 695 * @nr_grefs: number of grant reference
 696 * @handles: pointer to grant handle to be filled
 697 * @vaddrs: addresses to be mapped to
 698 * @leaked: fail to clean up a failed map, caller should not free vaddr
 699 *
 700 * Map pages of memory into this domain from another domain's grant table.
 701 * xenbus_map_ring does not allocate the virtual address space (you must do
 702 * this yourself!). It only maps in the pages to the specified address.
 703 * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
 704 * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
 705 * XenbusStateClosing and the first error message will be saved in XenStore.
 706 * Further more if we fail to map the ring, caller should check @leaked.
 707 * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
 708 * should not free the address space of @vaddr.
 709 */
 710int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
 711                    unsigned int nr_grefs, grant_handle_t *handles,
 712                    unsigned long *vaddrs, bool *leaked)
 713{
 714        phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
 715        int i;
 716
 717        if (nr_grefs > XENBUS_MAX_RING_GRANTS)
 718                return -EINVAL;
 719
 720        for (i = 0; i < nr_grefs; i++)
 721                phys_addrs[i] = (unsigned long)vaddrs[i];
 722
 723        return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
 724                                 phys_addrs, GNTMAP_host_map, leaked);
 725}
 726EXPORT_SYMBOL_GPL(xenbus_map_ring);
 727
 728
 729/**
 730 * xenbus_unmap_ring_vfree
 731 * @dev: xenbus device
 732 * @vaddr: addr to unmap
 733 *
 734 * Based on Rusty Russell's skeleton driver's unmap_page.
 735 * Unmap a page of memory in this domain that was imported from another domain.
 736 * Use xenbus_unmap_ring_vfree if you mapped in your memory with
 737 * xenbus_map_ring_valloc (it will free the virtual address space).
 738 * Returns 0 on success and returns GNTST_* on error
 739 * (see xen/include/interface/grant_table.h).
 740 */
 741int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 742{
 743        return ring_ops->unmap(dev, vaddr);
 744}
 745EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 746
 747static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 748{
 749        struct xenbus_map_node *node;
 750        struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
 751        unsigned int level;
 752        int i;
 753        bool leaked = false;
 754        int err;
 755
 756        spin_lock(&xenbus_valloc_lock);
 757        list_for_each_entry(node, &xenbus_valloc_pages, next) {
 758                if (node->pv.area->addr == vaddr) {
 759                        list_del(&node->next);
 760                        goto found;
 761                }
 762        }
 763        node = NULL;
 764 found:
 765        spin_unlock(&xenbus_valloc_lock);
 766
 767        if (!node) {
 768                xenbus_dev_error(dev, -ENOENT,
 769                                 "can't find mapped virtual address %p", vaddr);
 770                return GNTST_bad_virt_addr;
 771        }
 772
 773        for (i = 0; i < node->nr_handles; i++) {
 774                unsigned long addr;
 775
 776                memset(&unmap[i], 0, sizeof(unmap[i]));
 777                addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
 778                unmap[i].host_addr = arbitrary_virt_to_machine(
 779                        lookup_address(addr, &level)).maddr;
 780                unmap[i].dev_bus_addr = 0;
 781                unmap[i].handle = node->handles[i];
 782        }
 783
 784        if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 785                BUG();
 786
 787        err = GNTST_okay;
 788        leaked = false;
 789        for (i = 0; i < node->nr_handles; i++) {
 790                if (unmap[i].status != GNTST_okay) {
 791                        leaked = true;
 792                        xenbus_dev_error(dev, unmap[i].status,
 793                                         "unmapping page at handle %d error %d",
 794                                         node->handles[i], unmap[i].status);
 795                        err = unmap[i].status;
 796                        break;
 797                }
 798        }
 799
 800        if (!leaked)
 801                free_vm_area(node->pv.area);
 802        else
 803                pr_alert("leaking VM area %p size %u page(s)",
 804                         node->pv.area, node->nr_handles);
 805
 806        kfree(node);
 807        return err;
 808}
 809
 810struct unmap_ring_vfree_hvm
 811{
 812        unsigned int idx;
 813        unsigned long addrs[XENBUS_MAX_RING_GRANTS];
 814};
 815
 816static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
 817                                              unsigned int goffset,
 818                                              unsigned int len,
 819                                              void *data)
 820{
 821        struct unmap_ring_vfree_hvm *info = data;
 822
 823        info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
 824
 825        info->idx++;
 826}
 827
 828static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 829{
 830        int rv;
 831        struct xenbus_map_node *node;
 832        void *addr;
 833        struct unmap_ring_vfree_hvm info = {
 834                .idx = 0,
 835        };
 836        unsigned int nr_pages;
 837
 838        spin_lock(&xenbus_valloc_lock);
 839        list_for_each_entry(node, &xenbus_valloc_pages, next) {
 840                addr = node->hvm.addr;
 841                if (addr == vaddr) {
 842                        list_del(&node->next);
 843                        goto found;
 844                }
 845        }
 846        node = addr = NULL;
 847 found:
 848        spin_unlock(&xenbus_valloc_lock);
 849
 850        if (!node) {
 851                xenbus_dev_error(dev, -ENOENT,
 852                                 "can't find mapped virtual address %p", vaddr);
 853                return GNTST_bad_virt_addr;
 854        }
 855
 856        nr_pages = XENBUS_PAGES(node->nr_handles);
 857
 858        gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
 859                             xenbus_unmap_ring_setup_grant_hvm,
 860                             &info);
 861
 862        rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
 863                               info.addrs);
 864        if (!rv) {
 865                vunmap(vaddr);
 866                free_xenballooned_pages(nr_pages, node->hvm.pages);
 867        }
 868        else
 869                WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
 870
 871        kfree(node);
 872        return rv;
 873}
 874
 875/**
 876 * xenbus_unmap_ring
 877 * @dev: xenbus device
 878 * @handles: grant handle array
 879 * @nr_handles: number of handles in the array
 880 * @vaddrs: addresses to unmap
 881 *
 882 * Unmap memory in this domain that was imported from another domain.
 883 * Returns 0 on success and returns GNTST_* on error
 884 * (see xen/include/interface/grant_table.h).
 885 */
 886int xenbus_unmap_ring(struct xenbus_device *dev,
 887                      grant_handle_t *handles, unsigned int nr_handles,
 888                      unsigned long *vaddrs)
 889{
 890        struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
 891        int i;
 892        int err;
 893
 894        if (nr_handles > XENBUS_MAX_RING_GRANTS)
 895                return -EINVAL;
 896
 897        for (i = 0; i < nr_handles; i++)
 898                gnttab_set_unmap_op(&unmap[i], vaddrs[i],
 899                                    GNTMAP_host_map, handles[i]);
 900
 901        if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 902                BUG();
 903
 904        err = GNTST_okay;
 905        for (i = 0; i < nr_handles; i++) {
 906                if (unmap[i].status != GNTST_okay) {
 907                        xenbus_dev_error(dev, unmap[i].status,
 908                                         "unmapping page at handle %d error %d",
 909                                         handles[i], unmap[i].status);
 910                        err = unmap[i].status;
 911                        break;
 912                }
 913        }
 914
 915        return err;
 916}
 917EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 918
 919
 920/**
 921 * xenbus_read_driver_state
 922 * @path: path for driver
 923 *
 924 * Return the state of the driver rooted at the given store path, or
 925 * XenbusStateUnknown if no state can be read.
 926 */
 927enum xenbus_state xenbus_read_driver_state(const char *path)
 928{
 929        enum xenbus_state result;
 930        int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
 931        if (err)
 932                result = XenbusStateUnknown;
 933
 934        return result;
 935}
 936EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
 937
 938static const struct xenbus_ring_ops ring_ops_pv = {
 939        .map = xenbus_map_ring_valloc_pv,
 940        .unmap = xenbus_unmap_ring_vfree_pv,
 941};
 942
 943static const struct xenbus_ring_ops ring_ops_hvm = {
 944        .map = xenbus_map_ring_valloc_hvm,
 945        .unmap = xenbus_unmap_ring_vfree_hvm,
 946};
 947
 948void __init xenbus_ring_ops_init(void)
 949{
 950        if (!xen_feature(XENFEAT_auto_translated_physmap))
 951                ring_ops = &ring_ops_pv;
 952        else
 953                ring_ops = &ring_ops_hvm;
 954}
 955