linux/drivers/xen/grant-table.c
<<
>>
Prefs
   1/******************************************************************************
   2 * grant_table.c
   3 *
   4 * Granting foreign access to our memory reservation.
   5 *
   6 * Copyright (c) 2005-2006, Christopher Clark
   7 * Copyright (c) 2004-2005, K A Fraser
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License version 2
  11 * as published by the Free Software Foundation; or, when distributed
  12 * separately from the Linux kernel or incorporated into other
  13 * software packages, subject to the following license:
  14 *
  15 * Permission is hereby granted, free of charge, to any person obtaining a copy
  16 * of this source file (the "Software"), to deal in the Software without
  17 * restriction, including without limitation the rights to use, copy, modify,
  18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  19 * and to permit persons to whom the Software is furnished to do so, subject to
  20 * the following conditions:
  21 *
  22 * The above copyright notice and this permission notice shall be included in
  23 * all copies or substantial portions of the Software.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  31 * IN THE SOFTWARE.
  32 */
  33
  34#include <linux/module.h>
  35#include <linux/sched.h>
  36#include <linux/mm.h>
  37#include <linux/slab.h>
  38#include <linux/vmalloc.h>
  39#include <linux/uaccess.h>
  40#include <linux/io.h>
  41#include <linux/delay.h>
  42#include <linux/hardirq.h>
  43
  44#include <xen/xen.h>
  45#include <xen/interface/xen.h>
  46#include <xen/page.h>
  47#include <xen/grant_table.h>
  48#include <xen/interface/memory.h>
  49#include <xen/hvc-console.h>
  50#include <asm/xen/hypercall.h>
  51#include <asm/xen/interface.h>
  52
  53#include <asm/pgtable.h>
  54#include <asm/sync_bitops.h>
  55
  56/* External tools reserve first few grant table entries. */
  57#define NR_RESERVED_ENTRIES 8
  58#define GNTTAB_LIST_END 0xffffffff
  59
  60static grant_ref_t **gnttab_list;
  61static unsigned int nr_grant_frames;
  62static unsigned int boot_max_nr_grant_frames;
  63static int gnttab_free_count;
  64static grant_ref_t gnttab_free_head;
  65static DEFINE_SPINLOCK(gnttab_list_lock);
  66unsigned long xen_hvm_resume_frames;
  67EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
  68
  69static union {
  70        struct grant_entry_v1 *v1;
  71        union grant_entry_v2 *v2;
  72        void *addr;
  73} gnttab_shared;
  74
  75/*This is a structure of function pointers for grant table*/
  76struct gnttab_ops {
  77        /*
  78         * Mapping a list of frames for storing grant entries. Frames parameter
  79         * is used to store grant table address when grant table being setup,
  80         * nr_gframes is the number of frames to map grant table. Returning
  81         * GNTST_okay means success and negative value means failure.
  82         */
  83        int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
  84        /*
  85         * Release a list of frames which are mapped in map_frames for grant
  86         * entry status.
  87         */
  88        void (*unmap_frames)(void);
  89        /*
  90         * Introducing a valid entry into the grant table, granting the frame of
  91         * this grant entry to domain for accessing or transfering. Ref
  92         * parameter is reference of this introduced grant entry, domid is id of
  93         * granted domain, frame is the page frame to be granted, and flags is
  94         * status of the grant entry to be updated.
  95         */
  96        void (*update_entry)(grant_ref_t ref, domid_t domid,
  97                             unsigned long frame, unsigned flags);
  98        /*
  99         * Stop granting a grant entry to domain for accessing. Ref parameter is
 100         * reference of a grant entry whose grant access will be stopped,
 101         * readonly is not in use in this function. If the grant entry is
 102         * currently mapped for reading or writing, just return failure(==0)
 103         * directly and don't tear down the grant access. Otherwise, stop grant
 104         * access for this entry and return success(==1).
 105         */
 106        int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
 107        /*
 108         * Stop granting a grant entry to domain for transfer. Ref parameter is
 109         * reference of a grant entry whose grant transfer will be stopped. If
 110         * tranfer has not started, just reclaim the grant entry and return
 111         * failure(==0). Otherwise, wait for the transfer to complete and then
 112         * return the frame.
 113         */
 114        unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
 115        /*
 116         * Query the status of a grant entry. Ref parameter is reference of
 117         * queried grant entry, return value is the status of queried entry.
 118         * Detailed status(writing/reading) can be gotten from the return value
 119         * by bit operations.
 120         */
 121        int (*query_foreign_access)(grant_ref_t ref);
 122        /*
 123         * Grant a domain to access a range of bytes within the page referred by
 124         * an available grant entry. Ref parameter is reference of a grant entry
 125         * which will be sub-page accessed, domid is id of grantee domain, frame
 126         * is frame address of subpage grant, flags is grant type and flag
 127         * information, page_off is offset of the range of bytes, and length is
 128         * length of bytes to be accessed.
 129         */
 130        void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
 131                                     unsigned long frame, int flags,
 132                                     unsigned page_off, unsigned length);
 133        /*
 134         * Redirect an available grant entry on domain A to another grant
 135         * reference of domain B, then allow domain C to use grant reference
 136         * of domain B transitively. Ref parameter is an available grant entry
 137         * reference on domain A, domid is id of domain C which accesses grant
 138         * entry transitively, flags is grant type and flag information,
 139         * trans_domid is id of domain B whose grant entry is finally accessed
 140         * transitively, trans_gref is grant entry transitive reference of
 141         * domain B.
 142         */
 143        void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
 144                                   domid_t trans_domid, grant_ref_t trans_gref);
 145};
 146
 147static struct gnttab_ops *gnttab_interface;
 148
 149/*This reflects status of grant entries, so act as a global value*/
 150static grant_status_t *grstatus;
 151
 152static int grant_table_version;
 153static int grefs_per_grant_frame;
 154
 155static struct gnttab_free_callback *gnttab_free_callback_list;
 156
 157static int gnttab_expand(unsigned int req_entries);
 158
 159#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
 160#define SPP (PAGE_SIZE / sizeof(grant_status_t))
 161
 162static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
 163{
 164        return &gnttab_list[(entry) / RPP][(entry) % RPP];
 165}
 166/* This can be used as an l-value */
 167#define gnttab_entry(entry) (*__gnttab_entry(entry))
 168
 169static int get_free_entries(unsigned count)
 170{
 171        unsigned long flags;
 172        int ref, rc = 0;
 173        grant_ref_t head;
 174
 175        spin_lock_irqsave(&gnttab_list_lock, flags);
 176
 177        if ((gnttab_free_count < count) &&
 178            ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
 179                spin_unlock_irqrestore(&gnttab_list_lock, flags);
 180                return rc;
 181        }
 182
 183        ref = head = gnttab_free_head;
 184        gnttab_free_count -= count;
 185        while (count-- > 1)
 186                head = gnttab_entry(head);
 187        gnttab_free_head = gnttab_entry(head);
 188        gnttab_entry(head) = GNTTAB_LIST_END;
 189
 190        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 191
 192        return ref;
 193}
 194
 195static void do_free_callbacks(void)
 196{
 197        struct gnttab_free_callback *callback, *next;
 198
 199        callback = gnttab_free_callback_list;
 200        gnttab_free_callback_list = NULL;
 201
 202        while (callback != NULL) {
 203                next = callback->next;
 204                if (gnttab_free_count >= callback->count) {
 205                        callback->next = NULL;
 206                        callback->fn(callback->arg);
 207                } else {
 208                        callback->next = gnttab_free_callback_list;
 209                        gnttab_free_callback_list = callback;
 210                }
 211                callback = next;
 212        }
 213}
 214
 215static inline void check_free_callbacks(void)
 216{
 217        if (unlikely(gnttab_free_callback_list))
 218                do_free_callbacks();
 219}
 220
 221static void put_free_entry(grant_ref_t ref)
 222{
 223        unsigned long flags;
 224        spin_lock_irqsave(&gnttab_list_lock, flags);
 225        gnttab_entry(ref) = gnttab_free_head;
 226        gnttab_free_head = ref;
 227        gnttab_free_count++;
 228        check_free_callbacks();
 229        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 230}
 231
 232/*
 233 * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
 234 * Introducing a valid entry into the grant table:
 235 *  1. Write ent->domid.
 236 *  2. Write ent->frame:
 237 *      GTF_permit_access:   Frame to which access is permitted.
 238 *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
 239 *                           frame, or zero if none.
 240 *  3. Write memory barrier (WMB).
 241 *  4. Write ent->flags, inc. valid type.
 242 */
 243static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
 244                                   unsigned long frame, unsigned flags)
 245{
 246        gnttab_shared.v1[ref].domid = domid;
 247        gnttab_shared.v1[ref].frame = frame;
 248        wmb();
 249        gnttab_shared.v1[ref].flags = flags;
 250}
 251
 252static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
 253                                   unsigned long frame, unsigned flags)
 254{
 255        gnttab_shared.v2[ref].hdr.domid = domid;
 256        gnttab_shared.v2[ref].full_page.frame = frame;
 257        wmb();
 258        gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
 259}
 260
 261/*
 262 * Public grant-issuing interface functions
 263 */
 264void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 265                                     unsigned long frame, int readonly)
 266{
 267        gnttab_interface->update_entry(ref, domid, frame,
 268                           GTF_permit_access | (readonly ? GTF_readonly : 0));
 269}
 270EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
 271
 272int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 273                                int readonly)
 274{
 275        int ref;
 276
 277        ref = get_free_entries(1);
 278        if (unlikely(ref < 0))
 279                return -ENOSPC;
 280
 281        gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
 282
 283        return ref;
 284}
 285EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 286
 287static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
 288                                           unsigned long frame, int flags,
 289                                           unsigned page_off, unsigned length)
 290{
 291        gnttab_shared.v2[ref].sub_page.frame = frame;
 292        gnttab_shared.v2[ref].sub_page.page_off = page_off;
 293        gnttab_shared.v2[ref].sub_page.length = length;
 294        gnttab_shared.v2[ref].hdr.domid = domid;
 295        wmb();
 296        gnttab_shared.v2[ref].hdr.flags =
 297                                GTF_permit_access | GTF_sub_page | flags;
 298}
 299
 300int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
 301                                            unsigned long frame, int flags,
 302                                            unsigned page_off,
 303                                            unsigned length)
 304{
 305        if (flags & (GTF_accept_transfer | GTF_reading |
 306                     GTF_writing | GTF_transitive))
 307                return -EPERM;
 308
 309        if (gnttab_interface->update_subpage_entry == NULL)
 310                return -ENOSYS;
 311
 312        gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
 313                                               page_off, length);
 314
 315        return 0;
 316}
 317EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
 318
 319int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
 320                                        int flags, unsigned page_off,
 321                                        unsigned length)
 322{
 323        int ref, rc;
 324
 325        ref = get_free_entries(1);
 326        if (unlikely(ref < 0))
 327                return -ENOSPC;
 328
 329        rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
 330                                                     page_off, length);
 331        if (rc < 0) {
 332                put_free_entry(ref);
 333                return rc;
 334        }
 335
 336        return ref;
 337}
 338EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
 339
 340bool gnttab_subpage_grants_available(void)
 341{
 342        return gnttab_interface->update_subpage_entry != NULL;
 343}
 344EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
 345
 346static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
 347                                         int flags, domid_t trans_domid,
 348                                         grant_ref_t trans_gref)
 349{
 350        gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
 351        gnttab_shared.v2[ref].transitive.gref = trans_gref;
 352        gnttab_shared.v2[ref].hdr.domid = domid;
 353        wmb();
 354        gnttab_shared.v2[ref].hdr.flags =
 355                                GTF_permit_access | GTF_transitive | flags;
 356}
 357
 358int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
 359                                          int flags, domid_t trans_domid,
 360                                          grant_ref_t trans_gref)
 361{
 362        if (flags & (GTF_accept_transfer | GTF_reading |
 363                     GTF_writing | GTF_sub_page))
 364                return -EPERM;
 365
 366        if (gnttab_interface->update_trans_entry == NULL)
 367                return -ENOSYS;
 368
 369        gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
 370                                             trans_gref);
 371
 372        return 0;
 373}
 374EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
 375
 376int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
 377                                      domid_t trans_domid,
 378                                      grant_ref_t trans_gref)
 379{
 380        int ref, rc;
 381
 382        ref = get_free_entries(1);
 383        if (unlikely(ref < 0))
 384                return -ENOSPC;
 385
 386        rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
 387                                                   trans_domid, trans_gref);
 388        if (rc < 0) {
 389                put_free_entry(ref);
 390                return rc;
 391        }
 392
 393        return ref;
 394}
 395EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
 396
 397bool gnttab_trans_grants_available(void)
 398{
 399        return gnttab_interface->update_trans_entry != NULL;
 400}
 401EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
 402
 403static int gnttab_query_foreign_access_v1(grant_ref_t ref)
 404{
 405        return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
 406}
 407
 408static int gnttab_query_foreign_access_v2(grant_ref_t ref)
 409{
 410        return grstatus[ref] & (GTF_reading|GTF_writing);
 411}
 412
 413int gnttab_query_foreign_access(grant_ref_t ref)
 414{
 415        return gnttab_interface->query_foreign_access(ref);
 416}
 417EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
 418
 419static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
 420{
 421        u16 flags, nflags;
 422        u16 *pflags;
 423
 424        pflags = &gnttab_shared.v1[ref].flags;
 425        nflags = *pflags;
 426        do {
 427                flags = nflags;
 428                if (flags & (GTF_reading|GTF_writing))
 429                        return 0;
 430        } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
 431
 432        return 1;
 433}
 434
 435static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
 436{
 437        gnttab_shared.v2[ref].hdr.flags = 0;
 438        mb();
 439        if (grstatus[ref] & (GTF_reading|GTF_writing)) {
 440                return 0;
 441        } else {
 442                /* The read of grstatus needs to have acquire
 443                semantics.  On x86, reads already have
 444                that, and we just need to protect against
 445                compiler reorderings.  On other
 446                architectures we may need a full
 447                barrier. */
 448#ifdef CONFIG_X86
 449                barrier();
 450#else
 451                mb();
 452#endif
 453        }
 454
 455        return 1;
 456}
 457
 458static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
 459{
 460        return gnttab_interface->end_foreign_access_ref(ref, readonly);
 461}
 462
 463int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
 464{
 465        if (_gnttab_end_foreign_access_ref(ref, readonly))
 466                return 1;
 467        pr_warn("WARNING: g.e. %#x still in use!\n", ref);
 468        return 0;
 469}
 470EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
 471
 472struct deferred_entry {
 473        struct list_head list;
 474        grant_ref_t ref;
 475        bool ro;
 476        uint16_t warn_delay;
 477        struct page *page;
 478};
 479static LIST_HEAD(deferred_list);
 480static void gnttab_handle_deferred(unsigned long);
 481static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
 482
 483static void gnttab_handle_deferred(unsigned long unused)
 484{
 485        unsigned int nr = 10;
 486        struct deferred_entry *first = NULL;
 487        unsigned long flags;
 488
 489        spin_lock_irqsave(&gnttab_list_lock, flags);
 490        while (nr--) {
 491                struct deferred_entry *entry
 492                        = list_first_entry(&deferred_list,
 493                                           struct deferred_entry, list);
 494
 495                if (entry == first)
 496                        break;
 497                list_del(&entry->list);
 498                spin_unlock_irqrestore(&gnttab_list_lock, flags);
 499                if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
 500                        put_free_entry(entry->ref);
 501                        if (entry->page) {
 502                                pr_debug("freeing g.e. %#x (pfn %#lx)\n",
 503                                         entry->ref, page_to_pfn(entry->page));
 504                                __free_page(entry->page);
 505                        } else
 506                                pr_info("freeing g.e. %#x\n", entry->ref);
 507                        kfree(entry);
 508                        entry = NULL;
 509                } else {
 510                        if (!--entry->warn_delay)
 511                                pr_info("g.e. %#x still pending\n",
 512                                        entry->ref);
 513                        if (!first)
 514                                first = entry;
 515                }
 516                spin_lock_irqsave(&gnttab_list_lock, flags);
 517                if (entry)
 518                        list_add_tail(&entry->list, &deferred_list);
 519                else if (list_empty(&deferred_list))
 520                        break;
 521        }
 522        if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
 523                deferred_timer.expires = jiffies + HZ;
 524                add_timer(&deferred_timer);
 525        }
 526        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 527}
 528
 529static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
 530                                struct page *page)
 531{
 532        struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 533        const char *what = KERN_WARNING "leaking";
 534
 535        if (entry) {
 536                unsigned long flags;
 537
 538                entry->ref = ref;
 539                entry->ro = readonly;
 540                entry->page = page;
 541                entry->warn_delay = 60;
 542                spin_lock_irqsave(&gnttab_list_lock, flags);
 543                list_add_tail(&entry->list, &deferred_list);
 544                if (!timer_pending(&deferred_timer)) {
 545                        deferred_timer.expires = jiffies + HZ;
 546                        add_timer(&deferred_timer);
 547                }
 548                spin_unlock_irqrestore(&gnttab_list_lock, flags);
 549                what = KERN_DEBUG "deferring";
 550        }
 551        printk("%s g.e. %#x (pfn %#lx)\n",
 552               what, ref, page ? page_to_pfn(page) : -1);
 553}
 554
 555void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 556                               unsigned long page)
 557{
 558        if (gnttab_end_foreign_access_ref(ref, readonly)) {
 559                put_free_entry(ref);
 560                if (page != 0)
 561                        free_page(page);
 562        } else
 563                gnttab_add_deferred(ref, readonly,
 564                                    page ? virt_to_page(page) : NULL);
 565}
 566EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
 567
 568int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
 569{
 570        int ref;
 571
 572        ref = get_free_entries(1);
 573        if (unlikely(ref < 0))
 574                return -ENOSPC;
 575        gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
 576
 577        return ref;
 578}
 579EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
 580
 581void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
 582                                       unsigned long pfn)
 583{
 584        gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
 585}
 586EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
 587
 588static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
 589{
 590        unsigned long frame;
 591        u16           flags;
 592        u16          *pflags;
 593
 594        pflags = &gnttab_shared.v1[ref].flags;
 595
 596        /*
 597         * If a transfer is not even yet started, try to reclaim the grant
 598         * reference and return failure (== 0).
 599         */
 600        while (!((flags = *pflags) & GTF_transfer_committed)) {
 601                if (sync_cmpxchg(pflags, flags, 0) == flags)
 602                        return 0;
 603                cpu_relax();
 604        }
 605
 606        /* If a transfer is in progress then wait until it is completed. */
 607        while (!(flags & GTF_transfer_completed)) {
 608                flags = *pflags;
 609                cpu_relax();
 610        }
 611
 612        rmb();  /* Read the frame number /after/ reading completion status. */
 613        frame = gnttab_shared.v1[ref].frame;
 614        BUG_ON(frame == 0);
 615
 616        return frame;
 617}
 618
 619static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
 620{
 621        unsigned long frame;
 622        u16           flags;
 623        u16          *pflags;
 624
 625        pflags = &gnttab_shared.v2[ref].hdr.flags;
 626
 627        /*
 628         * If a transfer is not even yet started, try to reclaim the grant
 629         * reference and return failure (== 0).
 630         */
 631        while (!((flags = *pflags) & GTF_transfer_committed)) {
 632                if (sync_cmpxchg(pflags, flags, 0) == flags)
 633                        return 0;
 634                cpu_relax();
 635        }
 636
 637        /* If a transfer is in progress then wait until it is completed. */
 638        while (!(flags & GTF_transfer_completed)) {
 639                flags = *pflags;
 640                cpu_relax();
 641        }
 642
 643        rmb();  /* Read the frame number /after/ reading completion status. */
 644        frame = gnttab_shared.v2[ref].full_page.frame;
 645        BUG_ON(frame == 0);
 646
 647        return frame;
 648}
 649
 650unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
 651{
 652        return gnttab_interface->end_foreign_transfer_ref(ref);
 653}
 654EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
 655
 656unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
 657{
 658        unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
 659        put_free_entry(ref);
 660        return frame;
 661}
 662EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
 663
 664void gnttab_free_grant_reference(grant_ref_t ref)
 665{
 666        put_free_entry(ref);
 667}
 668EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
 669
 670void gnttab_free_grant_references(grant_ref_t head)
 671{
 672        grant_ref_t ref;
 673        unsigned long flags;
 674        int count = 1;
 675        if (head == GNTTAB_LIST_END)
 676                return;
 677        spin_lock_irqsave(&gnttab_list_lock, flags);
 678        ref = head;
 679        while (gnttab_entry(ref) != GNTTAB_LIST_END) {
 680                ref = gnttab_entry(ref);
 681                count++;
 682        }
 683        gnttab_entry(ref) = gnttab_free_head;
 684        gnttab_free_head = head;
 685        gnttab_free_count += count;
 686        check_free_callbacks();
 687        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 688}
 689EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
 690
 691int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
 692{
 693        int h = get_free_entries(count);
 694
 695        if (h < 0)
 696                return -ENOSPC;
 697
 698        *head = h;
 699
 700        return 0;
 701}
 702EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
 703
 704int gnttab_empty_grant_references(const grant_ref_t *private_head)
 705{
 706        return (*private_head == GNTTAB_LIST_END);
 707}
 708EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
 709
 710int gnttab_claim_grant_reference(grant_ref_t *private_head)
 711{
 712        grant_ref_t g = *private_head;
 713        if (unlikely(g == GNTTAB_LIST_END))
 714                return -ENOSPC;
 715        *private_head = gnttab_entry(g);
 716        return g;
 717}
 718EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
 719
 720void gnttab_release_grant_reference(grant_ref_t *private_head,
 721                                    grant_ref_t release)
 722{
 723        gnttab_entry(release) = *private_head;
 724        *private_head = release;
 725}
 726EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
 727
 728void gnttab_request_free_callback(struct gnttab_free_callback *callback,
 729                                  void (*fn)(void *), void *arg, u16 count)
 730{
 731        unsigned long flags;
 732        spin_lock_irqsave(&gnttab_list_lock, flags);
 733        if (callback->next)
 734                goto out;
 735        callback->fn = fn;
 736        callback->arg = arg;
 737        callback->count = count;
 738        callback->next = gnttab_free_callback_list;
 739        gnttab_free_callback_list = callback;
 740        check_free_callbacks();
 741out:
 742        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 743}
 744EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
 745
 746void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
 747{
 748        struct gnttab_free_callback **pcb;
 749        unsigned long flags;
 750
 751        spin_lock_irqsave(&gnttab_list_lock, flags);
 752        for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
 753                if (*pcb == callback) {
 754                        *pcb = callback->next;
 755                        break;
 756                }
 757        }
 758        spin_unlock_irqrestore(&gnttab_list_lock, flags);
 759}
 760EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
 761
 762static int grow_gnttab_list(unsigned int more_frames)
 763{
 764        unsigned int new_nr_grant_frames, extra_entries, i;
 765        unsigned int nr_glist_frames, new_nr_glist_frames;
 766
 767        BUG_ON(grefs_per_grant_frame == 0);
 768
 769        new_nr_grant_frames = nr_grant_frames + more_frames;
 770        extra_entries       = more_frames * grefs_per_grant_frame;
 771
 772        nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
 773        new_nr_glist_frames =
 774                (new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
 775        for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
 776                gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
 777                if (!gnttab_list[i])
 778                        goto grow_nomem;
 779        }
 780
 781
 782        for (i = grefs_per_grant_frame * nr_grant_frames;
 783             i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
 784                gnttab_entry(i) = i + 1;
 785
 786        gnttab_entry(i) = gnttab_free_head;
 787        gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
 788        gnttab_free_count += extra_entries;
 789
 790        nr_grant_frames = new_nr_grant_frames;
 791
 792        check_free_callbacks();
 793
 794        return 0;
 795
 796grow_nomem:
 797        for ( ; i >= nr_glist_frames; i--)
 798                free_page((unsigned long) gnttab_list[i]);
 799        return -ENOMEM;
 800}
 801
 802static unsigned int __max_nr_grant_frames(void)
 803{
 804        struct gnttab_query_size query;
 805        int rc;
 806
 807        query.dom = DOMID_SELF;
 808
 809        rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
 810        if ((rc < 0) || (query.status != GNTST_okay))
 811                return 4; /* Legacy max supported number of frames */
 812
 813        return query.max_nr_frames;
 814}
 815
 816unsigned int gnttab_max_grant_frames(void)
 817{
 818        unsigned int xen_max = __max_nr_grant_frames();
 819
 820        if (xen_max > boot_max_nr_grant_frames)
 821                return boot_max_nr_grant_frames;
 822        return xen_max;
 823}
 824EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 825
 826/* Handling of paged out grant targets (GNTST_eagain) */
 827#define MAX_DELAY 256
 828static inline void
 829gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
 830                                                const char *func)
 831{
 832        unsigned delay = 1;
 833
 834        do {
 835                BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
 836                if (*status == GNTST_eagain)
 837                        msleep(delay++);
 838        } while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
 839
 840        if (delay >= MAX_DELAY) {
 841                printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
 842                *status = GNTST_bad_page;
 843        }
 844}
 845
 846void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
 847{
 848        struct gnttab_map_grant_ref *op;
 849
 850        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
 851                BUG();
 852        for (op = batch; op < batch + count; op++)
 853                if (op->status == GNTST_eagain)
 854                        gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
 855                                                &op->status, __func__);
 856}
 857EXPORT_SYMBOL_GPL(gnttab_batch_map);
 858
 859void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
 860{
 861        struct gnttab_copy *op;
 862
 863        if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
 864                BUG();
 865        for (op = batch; op < batch + count; op++)
 866                if (op->status == GNTST_eagain)
 867                        gnttab_retry_eagain_gop(GNTTABOP_copy, op,
 868                                                &op->status, __func__);
 869}
 870EXPORT_SYMBOL_GPL(gnttab_batch_copy);
 871
 872int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 873                    struct gnttab_map_grant_ref *kmap_ops,
 874                    struct page **pages, unsigned int count)
 875{
 876        int i, ret;
 877        bool lazy = false;
 878        pte_t *pte;
 879        unsigned long mfn;
 880
 881        ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
 882        if (ret)
 883                return ret;
 884
 885        /* Retry eagain maps */
 886        for (i = 0; i < count; i++)
 887                if (map_ops[i].status == GNTST_eagain)
 888                        gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
 889                                                &map_ops[i].status, __func__);
 890
 891        if (xen_feature(XENFEAT_auto_translated_physmap))
 892                return ret;
 893
 894        if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
 895                arch_enter_lazy_mmu_mode();
 896                lazy = true;
 897        }
 898
 899        for (i = 0; i < count; i++) {
 900                /* Do not add to override if the map failed. */
 901                if (map_ops[i].status)
 902                        continue;
 903
 904                if (map_ops[i].flags & GNTMAP_contains_pte) {
 905                        pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
 906                                (map_ops[i].host_addr & ~PAGE_MASK));
 907                        mfn = pte_mfn(*pte);
 908                } else {
 909                        mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
 910                }
 911                ret = m2p_add_override(mfn, pages[i], kmap_ops ?
 912                                       &kmap_ops[i] : NULL);
 913                if (ret)
 914                        return ret;
 915        }
 916
 917        if (lazy)
 918                arch_leave_lazy_mmu_mode();
 919
 920        return ret;
 921}
 922EXPORT_SYMBOL_GPL(gnttab_map_refs);
 923
 924int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 925                      struct gnttab_map_grant_ref *kmap_ops,
 926                      struct page **pages, unsigned int count)
 927{
 928        int i, ret;
 929        bool lazy = false;
 930
 931        ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
 932        if (ret)
 933                return ret;
 934
 935        if (xen_feature(XENFEAT_auto_translated_physmap))
 936                return ret;
 937
 938        if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
 939                arch_enter_lazy_mmu_mode();
 940                lazy = true;
 941        }
 942
 943        for (i = 0; i < count; i++) {
 944                ret = m2p_remove_override(pages[i], kmap_ops ?
 945                                       &kmap_ops[i] : NULL);
 946                if (ret)
 947                        return ret;
 948        }
 949
 950        if (lazy)
 951                arch_leave_lazy_mmu_mode();
 952
 953        return ret;
 954}
 955EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
 956
 957static unsigned nr_status_frames(unsigned nr_grant_frames)
 958{
 959        BUG_ON(grefs_per_grant_frame == 0);
 960        return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
 961}
 962
 963static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
 964{
 965        int rc;
 966
 967        rc = arch_gnttab_map_shared(frames, nr_gframes,
 968                                    gnttab_max_grant_frames(),
 969                                    &gnttab_shared.addr);
 970        BUG_ON(rc);
 971
 972        return 0;
 973}
 974
 975static void gnttab_unmap_frames_v1(void)
 976{
 977        arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
 978}
 979
 980static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
 981{
 982        uint64_t *sframes;
 983        unsigned int nr_sframes;
 984        struct gnttab_get_status_frames getframes;
 985        int rc;
 986
 987        nr_sframes = nr_status_frames(nr_gframes);
 988
 989        /* No need for kzalloc as it is initialized in following hypercall
 990         * GNTTABOP_get_status_frames.
 991         */
 992        sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
 993        if (!sframes)
 994                return -ENOMEM;
 995
 996        getframes.dom        = DOMID_SELF;
 997        getframes.nr_frames  = nr_sframes;
 998        set_xen_guest_handle(getframes.frame_list, sframes);
 999
1000        rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1001                                       &getframes, 1);
1002        if (rc == -ENOSYS) {
1003                kfree(sframes);
1004                return -ENOSYS;
1005        }
1006
1007        BUG_ON(rc || getframes.status);
1008
1009        rc = arch_gnttab_map_status(sframes, nr_sframes,
1010                                    nr_status_frames(gnttab_max_grant_frames()),
1011                                    &grstatus);
1012        BUG_ON(rc);
1013        kfree(sframes);
1014
1015        rc = arch_gnttab_map_shared(frames, nr_gframes,
1016                                    gnttab_max_grant_frames(),
1017                                    &gnttab_shared.addr);
1018        BUG_ON(rc);
1019
1020        return 0;
1021}
1022
1023static void gnttab_unmap_frames_v2(void)
1024{
1025        arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1026        arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1027}
1028
1029static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1030{
1031        struct gnttab_setup_table setup;
1032        xen_pfn_t *frames;
1033        unsigned int nr_gframes = end_idx + 1;
1034        int rc;
1035
1036        if (xen_hvm_domain()) {
1037                struct xen_add_to_physmap xatp;
1038                unsigned int i = end_idx;
1039                rc = 0;
1040                /*
1041                 * Loop backwards, so that the first hypercall has the largest
1042                 * index, ensuring that the table will grow only once.
1043                 */
1044                do {
1045                        xatp.domid = DOMID_SELF;
1046                        xatp.idx = i;
1047                        xatp.space = XENMAPSPACE_grant_table;
1048                        xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
1049                        rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1050                        if (rc != 0) {
1051                                printk(KERN_WARNING
1052                                                "grant table add_to_physmap failed, err=%d\n", rc);
1053                                break;
1054                        }
1055                } while (i-- > start_idx);
1056
1057                return rc;
1058        }
1059
1060        /* No need for kzalloc as it is initialized in following hypercall
1061         * GNTTABOP_setup_table.
1062         */
1063        frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1064        if (!frames)
1065                return -ENOMEM;
1066
1067        setup.dom        = DOMID_SELF;
1068        setup.nr_frames  = nr_gframes;
1069        set_xen_guest_handle(setup.frame_list, frames);
1070
1071        rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1072        if (rc == -ENOSYS) {
1073                kfree(frames);
1074                return -ENOSYS;
1075        }
1076
1077        BUG_ON(rc || setup.status);
1078
1079        rc = gnttab_interface->map_frames(frames, nr_gframes);
1080
1081        kfree(frames);
1082
1083        return rc;
1084}
1085
1086static struct gnttab_ops gnttab_v1_ops = {
1087        .map_frames                     = gnttab_map_frames_v1,
1088        .unmap_frames                   = gnttab_unmap_frames_v1,
1089        .update_entry                   = gnttab_update_entry_v1,
1090        .end_foreign_access_ref         = gnttab_end_foreign_access_ref_v1,
1091        .end_foreign_transfer_ref       = gnttab_end_foreign_transfer_ref_v1,
1092        .query_foreign_access           = gnttab_query_foreign_access_v1,
1093};
1094
1095static struct gnttab_ops gnttab_v2_ops = {
1096        .map_frames                     = gnttab_map_frames_v2,
1097        .unmap_frames                   = gnttab_unmap_frames_v2,
1098        .update_entry                   = gnttab_update_entry_v2,
1099        .end_foreign_access_ref         = gnttab_end_foreign_access_ref_v2,
1100        .end_foreign_transfer_ref       = gnttab_end_foreign_transfer_ref_v2,
1101        .query_foreign_access           = gnttab_query_foreign_access_v2,
1102        .update_subpage_entry           = gnttab_update_subpage_entry_v2,
1103        .update_trans_entry             = gnttab_update_trans_entry_v2,
1104};
1105
1106static void gnttab_request_version(void)
1107{
1108        int rc;
1109        struct gnttab_set_version gsv;
1110
1111        if (xen_hvm_domain())
1112                gsv.version = 1;
1113        else
1114                gsv.version = 2;
1115        rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1116        if (rc == 0 && gsv.version == 2) {
1117                grant_table_version = 2;
1118                grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2);
1119                gnttab_interface = &gnttab_v2_ops;
1120        } else if (grant_table_version == 2) {
1121                /*
1122                 * If we've already used version 2 features,
1123                 * but then suddenly discover that they're not
1124                 * available (e.g. migrating to an older
1125                 * version of Xen), almost unbounded badness
1126                 * can happen.
1127                 */
1128                panic("we need grant tables version 2, but only version 1 is available");
1129        } else {
1130                grant_table_version = 1;
1131                grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
1132                gnttab_interface = &gnttab_v1_ops;
1133        }
1134        printk(KERN_INFO "Grant tables using version %d layout.\n",
1135                grant_table_version);
1136}
1137
1138static int gnttab_setup(void)
1139{
1140        unsigned int max_nr_gframes;
1141
1142        max_nr_gframes = gnttab_max_grant_frames();
1143        if (max_nr_gframes < nr_grant_frames)
1144                return -ENOSYS;
1145
1146        if (xen_pv_domain())
1147                return gnttab_map(0, nr_grant_frames - 1);
1148
1149        if (gnttab_shared.addr == NULL) {
1150                gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1151                                                PAGE_SIZE * max_nr_gframes);
1152                if (gnttab_shared.addr == NULL) {
1153                        printk(KERN_WARNING
1154                                        "Failed to ioremap gnttab share frames!");
1155                        return -ENOMEM;
1156                }
1157        }
1158
1159        gnttab_map(0, nr_grant_frames - 1);
1160
1161        return 0;
1162}
1163
1164int gnttab_resume(void)
1165{
1166        gnttab_request_version();
1167        return gnttab_setup();
1168}
1169
1170int gnttab_suspend(void)
1171{
1172        gnttab_interface->unmap_frames();
1173        return 0;
1174}
1175
1176static int gnttab_expand(unsigned int req_entries)
1177{
1178        int rc;
1179        unsigned int cur, extra;
1180
1181        BUG_ON(grefs_per_grant_frame == 0);
1182        cur = nr_grant_frames;
1183        extra = ((req_entries + (grefs_per_grant_frame-1)) /
1184                 grefs_per_grant_frame);
1185        if (cur + extra > gnttab_max_grant_frames())
1186                return -ENOSPC;
1187
1188        rc = gnttab_map(cur, cur + extra - 1);
1189        if (rc == 0)
1190                rc = grow_gnttab_list(extra);
1191
1192        return rc;
1193}
1194
1195int gnttab_init(void)
1196{
1197        int i;
1198        unsigned int max_nr_glist_frames, nr_glist_frames;
1199        unsigned int nr_init_grefs;
1200        int ret;
1201
1202        gnttab_request_version();
1203        nr_grant_frames = 1;
1204        boot_max_nr_grant_frames = __max_nr_grant_frames();
1205
1206        /* Determine the maximum number of frames required for the
1207         * grant reference free list on the current hypervisor.
1208         */
1209        BUG_ON(grefs_per_grant_frame == 0);
1210        max_nr_glist_frames = (boot_max_nr_grant_frames *
1211                               grefs_per_grant_frame / RPP);
1212
1213        gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1214                              GFP_KERNEL);
1215        if (gnttab_list == NULL)
1216                return -ENOMEM;
1217
1218        nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
1219        for (i = 0; i < nr_glist_frames; i++) {
1220                gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1221                if (gnttab_list[i] == NULL) {
1222                        ret = -ENOMEM;
1223                        goto ini_nomem;
1224                }
1225        }
1226
1227        if (gnttab_setup() < 0) {
1228                ret = -ENODEV;
1229                goto ini_nomem;
1230        }
1231
1232        nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
1233
1234        for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1235                gnttab_entry(i) = i + 1;
1236
1237        gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1238        gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1239        gnttab_free_head  = NR_RESERVED_ENTRIES;
1240
1241        printk("Grant table initialized\n");
1242        return 0;
1243
1244 ini_nomem:
1245        for (i--; i >= 0; i--)
1246                free_page((unsigned long)gnttab_list[i]);
1247        kfree(gnttab_list);
1248        return ret;
1249}
1250EXPORT_SYMBOL_GPL(gnttab_init);
1251
1252static int __gnttab_init(void)
1253{
1254        /* Delay grant-table initialization in the PV on HVM case */
1255        if (xen_hvm_domain())
1256                return 0;
1257
1258        if (!xen_pv_domain())
1259                return -ENODEV;
1260
1261        return gnttab_init();
1262}
1263
1264core_initcall(__gnttab_init);
1265