linux/drivers/xen/privcmd.c
<<
>>
Prefs
   1/******************************************************************************
   2 * privcmd.c
   3 *
   4 * Interface to privileged domain-0 commands.
   5 *
   6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   7 */
   8
   9#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  10
  11#include <linux/kernel.h>
  12#include <linux/module.h>
  13#include <linux/sched.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/uaccess.h>
  20#include <linux/swap.h>
  21#include <linux/highmem.h>
  22#include <linux/pagemap.h>
  23#include <linux/seq_file.h>
  24#include <linux/miscdevice.h>
  25#include <linux/moduleparam.h>
  26
  27#include <asm/pgalloc.h>
  28#include <asm/pgtable.h>
  29#include <asm/tlb.h>
  30#include <asm/xen/hypervisor.h>
  31#include <asm/xen/hypercall.h>
  32
  33#include <xen/xen.h>
  34#include <xen/privcmd.h>
  35#include <xen/interface/xen.h>
  36#include <xen/interface/hvm/dm_op.h>
  37#include <xen/features.h>
  38#include <xen/page.h>
  39#include <xen/xen-ops.h>
  40#include <xen/balloon.h>
  41
  42#include "privcmd.h"
  43
  44MODULE_LICENSE("GPL");
  45
  46#define PRIV_VMA_LOCKED ((void *)1)
  47
  48static unsigned int privcmd_dm_op_max_num = 16;
  49module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
  50MODULE_PARM_DESC(dm_op_max_nr_bufs,
  51                 "Maximum number of buffers per dm_op hypercall");
  52
  53static unsigned int privcmd_dm_op_buf_max_size = 4096;
  54module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
  55                   0644);
  56MODULE_PARM_DESC(dm_op_buf_max_size,
  57                 "Maximum size of a dm_op hypercall buffer");
  58
  59struct privcmd_data {
  60        domid_t domid;
  61};
  62
  63static int privcmd_vma_range_is_mapped(
  64               struct vm_area_struct *vma,
  65               unsigned long addr,
  66               unsigned long nr_pages);
  67
  68static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
  69{
  70        struct privcmd_data *data = file->private_data;
  71        struct privcmd_hypercall hypercall;
  72        long ret;
  73
  74        /* Disallow arbitrary hypercalls if restricted */
  75        if (data->domid != DOMID_INVALID)
  76                return -EPERM;
  77
  78        if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
  79                return -EFAULT;
  80
  81        xen_preemptible_hcall_begin();
  82        ret = privcmd_call(hypercall.op,
  83                           hypercall.arg[0], hypercall.arg[1],
  84                           hypercall.arg[2], hypercall.arg[3],
  85                           hypercall.arg[4]);
  86        xen_preemptible_hcall_end();
  87
  88        return ret;
  89}
  90
  91static void free_page_list(struct list_head *pages)
  92{
  93        struct page *p, *n;
  94
  95        list_for_each_entry_safe(p, n, pages, lru)
  96                __free_page(p);
  97
  98        INIT_LIST_HEAD(pages);
  99}
 100
 101/*
 102 * Given an array of items in userspace, return a list of pages
 103 * containing the data.  If copying fails, either because of memory
 104 * allocation failure or a problem reading user memory, return an
 105 * error code; its up to the caller to dispose of any partial list.
 106 */
 107static int gather_array(struct list_head *pagelist,
 108                        unsigned nelem, size_t size,
 109                        const void __user *data)
 110{
 111        unsigned pageidx;
 112        void *pagedata;
 113        int ret;
 114
 115        if (size > PAGE_SIZE)
 116                return 0;
 117
 118        pageidx = PAGE_SIZE;
 119        pagedata = NULL;        /* quiet, gcc */
 120        while (nelem--) {
 121                if (pageidx > PAGE_SIZE-size) {
 122                        struct page *page = alloc_page(GFP_KERNEL);
 123
 124                        ret = -ENOMEM;
 125                        if (page == NULL)
 126                                goto fail;
 127
 128                        pagedata = page_address(page);
 129
 130                        list_add_tail(&page->lru, pagelist);
 131                        pageidx = 0;
 132                }
 133
 134                ret = -EFAULT;
 135                if (copy_from_user(pagedata + pageidx, data, size))
 136                        goto fail;
 137
 138                data += size;
 139                pageidx += size;
 140        }
 141
 142        ret = 0;
 143
 144fail:
 145        return ret;
 146}
 147
 148/*
 149 * Call function "fn" on each element of the array fragmented
 150 * over a list of pages.
 151 */
 152static int traverse_pages(unsigned nelem, size_t size,
 153                          struct list_head *pos,
 154                          int (*fn)(void *data, void *state),
 155                          void *state)
 156{
 157        void *pagedata;
 158        unsigned pageidx;
 159        int ret = 0;
 160
 161        BUG_ON(size > PAGE_SIZE);
 162
 163        pageidx = PAGE_SIZE;
 164        pagedata = NULL;        /* hush, gcc */
 165
 166        while (nelem--) {
 167                if (pageidx > PAGE_SIZE-size) {
 168                        struct page *page;
 169                        pos = pos->next;
 170                        page = list_entry(pos, struct page, lru);
 171                        pagedata = page_address(page);
 172                        pageidx = 0;
 173                }
 174
 175                ret = (*fn)(pagedata + pageidx, state);
 176                if (ret)
 177                        break;
 178                pageidx += size;
 179        }
 180
 181        return ret;
 182}
 183
 184/*
 185 * Similar to traverse_pages, but use each page as a "block" of
 186 * data to be processed as one unit.
 187 */
 188static int traverse_pages_block(unsigned nelem, size_t size,
 189                                struct list_head *pos,
 190                                int (*fn)(void *data, int nr, void *state),
 191                                void *state)
 192{
 193        void *pagedata;
 194        int ret = 0;
 195
 196        BUG_ON(size > PAGE_SIZE);
 197
 198        while (nelem) {
 199                int nr = (PAGE_SIZE/size);
 200                struct page *page;
 201                if (nr > nelem)
 202                        nr = nelem;
 203                pos = pos->next;
 204                page = list_entry(pos, struct page, lru);
 205                pagedata = page_address(page);
 206                ret = (*fn)(pagedata, nr, state);
 207                if (ret)
 208                        break;
 209                nelem -= nr;
 210        }
 211
 212        return ret;
 213}
 214
 215struct mmap_gfn_state {
 216        unsigned long va;
 217        struct vm_area_struct *vma;
 218        domid_t domain;
 219};
 220
 221static int mmap_gfn_range(void *data, void *state)
 222{
 223        struct privcmd_mmap_entry *msg = data;
 224        struct mmap_gfn_state *st = state;
 225        struct vm_area_struct *vma = st->vma;
 226        int rc;
 227
 228        /* Do not allow range to wrap the address space. */
 229        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
 230            ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
 231                return -EINVAL;
 232
 233        /* Range chunks must be contiguous in va space. */
 234        if ((msg->va != st->va) ||
 235            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 236                return -EINVAL;
 237
 238        rc = xen_remap_domain_gfn_range(vma,
 239                                        msg->va & PAGE_MASK,
 240                                        msg->mfn, msg->npages,
 241                                        vma->vm_page_prot,
 242                                        st->domain, NULL);
 243        if (rc < 0)
 244                return rc;
 245
 246        st->va += msg->npages << PAGE_SHIFT;
 247
 248        return 0;
 249}
 250
 251static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
 252{
 253        struct privcmd_data *data = file->private_data;
 254        struct privcmd_mmap mmapcmd;
 255        struct mm_struct *mm = current->mm;
 256        struct vm_area_struct *vma;
 257        int rc;
 258        LIST_HEAD(pagelist);
 259        struct mmap_gfn_state state;
 260
 261        /* We only support privcmd_ioctl_mmap_batch for auto translated. */
 262        if (xen_feature(XENFEAT_auto_translated_physmap))
 263                return -ENOSYS;
 264
 265        if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 266                return -EFAULT;
 267
 268        /* If restriction is in place, check the domid matches */
 269        if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
 270                return -EPERM;
 271
 272        rc = gather_array(&pagelist,
 273                          mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 274                          mmapcmd.entry);
 275
 276        if (rc || list_empty(&pagelist))
 277                goto out;
 278
 279        down_write(&mm->mmap_sem);
 280
 281        {
 282                struct page *page = list_first_entry(&pagelist,
 283                                                     struct page, lru);
 284                struct privcmd_mmap_entry *msg = page_address(page);
 285
 286                vma = find_vma(mm, msg->va);
 287                rc = -EINVAL;
 288
 289                if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
 290                        goto out_up;
 291                vma->vm_private_data = PRIV_VMA_LOCKED;
 292        }
 293
 294        state.va = vma->vm_start;
 295        state.vma = vma;
 296        state.domain = mmapcmd.dom;
 297
 298        rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 299                            &pagelist,
 300                            mmap_gfn_range, &state);
 301
 302
 303out_up:
 304        up_write(&mm->mmap_sem);
 305
 306out:
 307        free_page_list(&pagelist);
 308
 309        return rc;
 310}
 311
 312struct mmap_batch_state {
 313        domid_t domain;
 314        unsigned long va;
 315        struct vm_area_struct *vma;
 316        int index;
 317        /* A tristate:
 318         *      0 for no errors
 319         *      1 if at least one error has happened (and no
 320         *          -ENOENT errors have happened)
 321         *      -ENOENT if at least 1 -ENOENT has happened.
 322         */
 323        int global_error;
 324        int version;
 325
 326        /* User-space gfn array to store errors in the second pass for V1. */
 327        xen_pfn_t __user *user_gfn;
 328        /* User-space int array to store errors in the second pass for V2. */
 329        int __user *user_err;
 330};
 331
 332/* auto translated dom0 note: if domU being created is PV, then gfn is
 333 * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
 334 */
 335static int mmap_batch_fn(void *data, int nr, void *state)
 336{
 337        xen_pfn_t *gfnp = data;
 338        struct mmap_batch_state *st = state;
 339        struct vm_area_struct *vma = st->vma;
 340        struct page **pages = vma->vm_private_data;
 341        struct page **cur_pages = NULL;
 342        int ret;
 343
 344        if (xen_feature(XENFEAT_auto_translated_physmap))
 345                cur_pages = &pages[st->index];
 346
 347        BUG_ON(nr < 0);
 348        ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr,
 349                                         (int *)gfnp, st->vma->vm_page_prot,
 350                                         st->domain, cur_pages);
 351
 352        /* Adjust the global_error? */
 353        if (ret != nr) {
 354                if (ret == -ENOENT)
 355                        st->global_error = -ENOENT;
 356                else {
 357                        /* Record that at least one error has happened. */
 358                        if (st->global_error == 0)
 359                                st->global_error = 1;
 360                }
 361        }
 362        st->va += XEN_PAGE_SIZE * nr;
 363        st->index += nr / XEN_PFN_PER_PAGE;
 364
 365        return 0;
 366}
 367
 368static int mmap_return_error(int err, struct mmap_batch_state *st)
 369{
 370        int ret;
 371
 372        if (st->version == 1) {
 373                if (err) {
 374                        xen_pfn_t gfn;
 375
 376                        ret = get_user(gfn, st->user_gfn);
 377                        if (ret < 0)
 378                                return ret;
 379                        /*
 380                         * V1 encodes the error codes in the 32bit top
 381                         * nibble of the gfn (with its known
 382                         * limitations vis-a-vis 64 bit callers).
 383                         */
 384                        gfn |= (err == -ENOENT) ?
 385                                PRIVCMD_MMAPBATCH_PAGED_ERROR :
 386                                PRIVCMD_MMAPBATCH_MFN_ERROR;
 387                        return __put_user(gfn, st->user_gfn++);
 388                } else
 389                        st->user_gfn++;
 390        } else { /* st->version == 2 */
 391                if (err)
 392                        return __put_user(err, st->user_err++);
 393                else
 394                        st->user_err++;
 395        }
 396
 397        return 0;
 398}
 399
 400static int mmap_return_errors(void *data, int nr, void *state)
 401{
 402        struct mmap_batch_state *st = state;
 403        int *errs = data;
 404        int i;
 405        int ret;
 406
 407        for (i = 0; i < nr; i++) {
 408                ret = mmap_return_error(errs[i], st);
 409                if (ret < 0)
 410                        return ret;
 411        }
 412        return 0;
 413}
 414
 415/* Allocate pfns that are then mapped with gfns from foreign domid. Update
 416 * the vma with the page info to use later.
 417 * Returns: 0 if success, otherwise -errno
 418 */
 419static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
 420{
 421        int rc;
 422        struct page **pages;
 423
 424        pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
 425        if (pages == NULL)
 426                return -ENOMEM;
 427
 428        rc = alloc_xenballooned_pages(numpgs, pages);
 429        if (rc != 0) {
 430                pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
 431                        numpgs, rc);
 432                kfree(pages);
 433                return -ENOMEM;
 434        }
 435        BUG_ON(vma->vm_private_data != NULL);
 436        vma->vm_private_data = pages;
 437
 438        return 0;
 439}
 440
 441static const struct vm_operations_struct privcmd_vm_ops;
 442
 443static long privcmd_ioctl_mmap_batch(
 444        struct file *file, void __user *udata, int version)
 445{
 446        struct privcmd_data *data = file->private_data;
 447        int ret;
 448        struct privcmd_mmapbatch_v2 m;
 449        struct mm_struct *mm = current->mm;
 450        struct vm_area_struct *vma;
 451        unsigned long nr_pages;
 452        LIST_HEAD(pagelist);
 453        struct mmap_batch_state state;
 454
 455        switch (version) {
 456        case 1:
 457                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
 458                        return -EFAULT;
 459                /* Returns per-frame error in m.arr. */
 460                m.err = NULL;
 461                if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
 462                        return -EFAULT;
 463                break;
 464        case 2:
 465                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
 466                        return -EFAULT;
 467                /* Returns per-frame error code in m.err. */
 468                if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
 469                        return -EFAULT;
 470                break;
 471        default:
 472                return -EINVAL;
 473        }
 474
 475        /* If restriction is in place, check the domid matches */
 476        if (data->domid != DOMID_INVALID && data->domid != m.dom)
 477                return -EPERM;
 478
 479        nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
 480        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 481                return -EINVAL;
 482
 483        ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 484
 485        if (ret)
 486                goto out;
 487        if (list_empty(&pagelist)) {
 488                ret = -EINVAL;
 489                goto out;
 490        }
 491
 492        if (version == 2) {
 493                /* Zero error array now to only copy back actual errors. */
 494                if (clear_user(m.err, sizeof(int) * m.num)) {
 495                        ret = -EFAULT;
 496                        goto out;
 497                }
 498        }
 499
 500        down_write(&mm->mmap_sem);
 501
 502        vma = find_vma(mm, m.addr);
 503        if (!vma ||
 504            vma->vm_ops != &privcmd_vm_ops) {
 505                ret = -EINVAL;
 506                goto out_unlock;
 507        }
 508
 509        /*
 510         * Caller must either:
 511         *
 512         * Map the whole VMA range, which will also allocate all the
 513         * pages required for the auto_translated_physmap case.
 514         *
 515         * Or
 516         *
 517         * Map unmapped holes left from a previous map attempt (e.g.,
 518         * because those foreign frames were previously paged out).
 519         */
 520        if (vma->vm_private_data == NULL) {
 521                if (m.addr != vma->vm_start ||
 522                    m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
 523                        ret = -EINVAL;
 524                        goto out_unlock;
 525                }
 526                if (xen_feature(XENFEAT_auto_translated_physmap)) {
 527                        ret = alloc_empty_pages(vma, nr_pages);
 528                        if (ret < 0)
 529                                goto out_unlock;
 530                } else
 531                        vma->vm_private_data = PRIV_VMA_LOCKED;
 532        } else {
 533                if (m.addr < vma->vm_start ||
 534                    m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
 535                        ret = -EINVAL;
 536                        goto out_unlock;
 537                }
 538                if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
 539                        ret = -EINVAL;
 540                        goto out_unlock;
 541                }
 542        }
 543
 544        state.domain        = m.dom;
 545        state.vma           = vma;
 546        state.va            = m.addr;
 547        state.index         = 0;
 548        state.global_error  = 0;
 549        state.version       = version;
 550
 551        BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0);
 552        /* mmap_batch_fn guarantees ret == 0 */
 553        BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
 554                                    &pagelist, mmap_batch_fn, &state));
 555
 556        up_write(&mm->mmap_sem);
 557
 558        if (state.global_error) {
 559                /* Write back errors in second pass. */
 560                state.user_gfn = (xen_pfn_t *)m.arr;
 561                state.user_err = m.err;
 562                ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
 563                                           &pagelist, mmap_return_errors, &state);
 564        } else
 565                ret = 0;
 566
 567        /* If we have not had any EFAULT-like global errors then set the global
 568         * error to -ENOENT if necessary. */
 569        if ((ret == 0) && (state.global_error == -ENOENT))
 570                ret = -ENOENT;
 571
 572out:
 573        free_page_list(&pagelist);
 574        return ret;
 575
 576out_unlock:
 577        up_write(&mm->mmap_sem);
 578        goto out;
 579}
 580
 581static int lock_pages(
 582        struct privcmd_dm_op_buf kbufs[], unsigned int num,
 583        struct page *pages[], unsigned int nr_pages)
 584{
 585        unsigned int i;
 586
 587        for (i = 0; i < num; i++) {
 588                unsigned int requested;
 589                int pinned;
 590
 591                requested = DIV_ROUND_UP(
 592                        offset_in_page(kbufs[i].uptr) + kbufs[i].size,
 593                        PAGE_SIZE);
 594                if (requested > nr_pages)
 595                        return -ENOSPC;
 596
 597                pinned = get_user_pages_fast(
 598                        (unsigned long) kbufs[i].uptr,
 599                        requested, FOLL_WRITE, pages);
 600                if (pinned < 0)
 601                        return pinned;
 602
 603                nr_pages -= pinned;
 604                pages += pinned;
 605        }
 606
 607        return 0;
 608}
 609
 610static void unlock_pages(struct page *pages[], unsigned int nr_pages)
 611{
 612        unsigned int i;
 613
 614        if (!pages)
 615                return;
 616
 617        for (i = 0; i < nr_pages; i++) {
 618                if (pages[i])
 619                        put_page(pages[i]);
 620        }
 621}
 622
 623static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
 624{
 625        struct privcmd_data *data = file->private_data;
 626        struct privcmd_dm_op kdata;
 627        struct privcmd_dm_op_buf *kbufs;
 628        unsigned int nr_pages = 0;
 629        struct page **pages = NULL;
 630        struct xen_dm_op_buf *xbufs = NULL;
 631        unsigned int i;
 632        long rc;
 633
 634        if (copy_from_user(&kdata, udata, sizeof(kdata)))
 635                return -EFAULT;
 636
 637        /* If restriction is in place, check the domid matches */
 638        if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
 639                return -EPERM;
 640
 641        if (kdata.num == 0)
 642                return 0;
 643
 644        if (kdata.num > privcmd_dm_op_max_num)
 645                return -E2BIG;
 646
 647        kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
 648        if (!kbufs)
 649                return -ENOMEM;
 650
 651        if (copy_from_user(kbufs, kdata.ubufs,
 652                           sizeof(*kbufs) * kdata.num)) {
 653                rc = -EFAULT;
 654                goto out;
 655        }
 656
 657        for (i = 0; i < kdata.num; i++) {
 658                if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
 659                        rc = -E2BIG;
 660                        goto out;
 661                }
 662
 663                if (!access_ok(VERIFY_WRITE, kbufs[i].uptr,
 664                               kbufs[i].size)) {
 665                        rc = -EFAULT;
 666                        goto out;
 667                }
 668
 669                nr_pages += DIV_ROUND_UP(
 670                        offset_in_page(kbufs[i].uptr) + kbufs[i].size,
 671                        PAGE_SIZE);
 672        }
 673
 674        pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
 675        if (!pages) {
 676                rc = -ENOMEM;
 677                goto out;
 678        }
 679
 680        xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
 681        if (!xbufs) {
 682                rc = -ENOMEM;
 683                goto out;
 684        }
 685
 686        rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
 687        if (rc)
 688                goto out;
 689
 690        for (i = 0; i < kdata.num; i++) {
 691                set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
 692                xbufs[i].size = kbufs[i].size;
 693        }
 694
 695        xen_preemptible_hcall_begin();
 696        rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
 697        xen_preemptible_hcall_end();
 698
 699out:
 700        unlock_pages(pages, nr_pages);
 701        kfree(xbufs);
 702        kfree(pages);
 703        kfree(kbufs);
 704
 705        return rc;
 706}
 707
 708static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
 709{
 710        struct privcmd_data *data = file->private_data;
 711        domid_t dom;
 712
 713        if (copy_from_user(&dom, udata, sizeof(dom)))
 714                return -EFAULT;
 715
 716        /* Set restriction to the specified domain, or check it matches */
 717        if (data->domid == DOMID_INVALID)
 718                data->domid = dom;
 719        else if (data->domid != dom)
 720                return -EINVAL;
 721
 722        return 0;
 723}
 724
 725static long privcmd_ioctl(struct file *file,
 726                          unsigned int cmd, unsigned long data)
 727{
 728        int ret = -ENOTTY;
 729        void __user *udata = (void __user *) data;
 730
 731        switch (cmd) {
 732        case IOCTL_PRIVCMD_HYPERCALL:
 733                ret = privcmd_ioctl_hypercall(file, udata);
 734                break;
 735
 736        case IOCTL_PRIVCMD_MMAP:
 737                ret = privcmd_ioctl_mmap(file, udata);
 738                break;
 739
 740        case IOCTL_PRIVCMD_MMAPBATCH:
 741                ret = privcmd_ioctl_mmap_batch(file, udata, 1);
 742                break;
 743
 744        case IOCTL_PRIVCMD_MMAPBATCH_V2:
 745                ret = privcmd_ioctl_mmap_batch(file, udata, 2);
 746                break;
 747
 748        case IOCTL_PRIVCMD_DM_OP:
 749                ret = privcmd_ioctl_dm_op(file, udata);
 750                break;
 751
 752        case IOCTL_PRIVCMD_RESTRICT:
 753                ret = privcmd_ioctl_restrict(file, udata);
 754                break;
 755
 756        default:
 757                break;
 758        }
 759
 760        return ret;
 761}
 762
 763static int privcmd_open(struct inode *ino, struct file *file)
 764{
 765        struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
 766
 767        if (!data)
 768                return -ENOMEM;
 769
 770        /* DOMID_INVALID implies no restriction */
 771        data->domid = DOMID_INVALID;
 772
 773        file->private_data = data;
 774        return 0;
 775}
 776
 777static int privcmd_release(struct inode *ino, struct file *file)
 778{
 779        struct privcmd_data *data = file->private_data;
 780
 781        kfree(data);
 782        return 0;
 783}
 784
 785static void privcmd_close(struct vm_area_struct *vma)
 786{
 787        struct page **pages = vma->vm_private_data;
 788        int numpgs = vma_pages(vma);
 789        int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
 790        int rc;
 791
 792        if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
 793                return;
 794
 795        rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
 796        if (rc == 0)
 797                free_xenballooned_pages(numpgs, pages);
 798        else
 799                pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
 800                        numpgs, rc);
 801        kfree(pages);
 802}
 803
 804static int privcmd_fault(struct vm_fault *vmf)
 805{
 806        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 807               vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
 808               vmf->pgoff, (void *)vmf->address);
 809
 810        return VM_FAULT_SIGBUS;
 811}
 812
 813static const struct vm_operations_struct privcmd_vm_ops = {
 814        .close = privcmd_close,
 815        .fault = privcmd_fault
 816};
 817
 818static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 819{
 820        /* DONTCOPY is essential for Xen because copy_page_range doesn't know
 821         * how to recreate these mappings */
 822        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
 823                         VM_DONTEXPAND | VM_DONTDUMP;
 824        vma->vm_ops = &privcmd_vm_ops;
 825        vma->vm_private_data = NULL;
 826
 827        return 0;
 828}
 829
 830/*
 831 * For MMAPBATCH*. This allows asserting the singleshot mapping
 832 * on a per pfn/pte basis. Mapping calls that fail with ENOENT
 833 * can be then retried until success.
 834 */
 835static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
 836                        unsigned long addr, void *data)
 837{
 838        return pte_none(*pte) ? 0 : -EBUSY;
 839}
 840
 841static int privcmd_vma_range_is_mapped(
 842                   struct vm_area_struct *vma,
 843                   unsigned long addr,
 844                   unsigned long nr_pages)
 845{
 846        return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
 847                                   is_mapped_fn, NULL) != 0;
 848}
 849
 850const struct file_operations xen_privcmd_fops = {
 851        .owner = THIS_MODULE,
 852        .unlocked_ioctl = privcmd_ioctl,
 853        .open = privcmd_open,
 854        .release = privcmd_release,
 855        .mmap = privcmd_mmap,
 856};
 857EXPORT_SYMBOL_GPL(xen_privcmd_fops);
 858
 859static struct miscdevice privcmd_dev = {
 860        .minor = MISC_DYNAMIC_MINOR,
 861        .name = "xen/privcmd",
 862        .fops = &xen_privcmd_fops,
 863};
 864
 865static int __init privcmd_init(void)
 866{
 867        int err;
 868
 869        if (!xen_domain())
 870                return -ENODEV;
 871
 872        err = misc_register(&privcmd_dev);
 873        if (err != 0) {
 874                pr_err("Could not register Xen privcmd device\n");
 875                return err;
 876        }
 877        return 0;
 878}
 879
 880static void __exit privcmd_exit(void)
 881{
 882        misc_deregister(&privcmd_dev);
 883}
 884
 885module_init(privcmd_init);
 886module_exit(privcmd_exit);
 887