linux/drivers/xen/privcmd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/******************************************************************************
   3 * privcmd.c
   4 *
   5 * Interface to privileged domain-0 commands.
   6 *
   7 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   8 */
   9
  10#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  11
  12#include <linux/kernel.h>
  13#include <linux/module.h>
  14#include <linux/sched.h>
  15#include <linux/slab.h>
  16#include <linux/string.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/mman.h>
  20#include <linux/uaccess.h>
  21#include <linux/swap.h>
  22#include <linux/highmem.h>
  23#include <linux/pagemap.h>
  24#include <linux/seq_file.h>
  25#include <linux/miscdevice.h>
  26#include <linux/moduleparam.h>
  27
  28#include <asm/pgalloc.h>
  29#include <asm/pgtable.h>
  30#include <asm/tlb.h>
  31#include <asm/xen/hypervisor.h>
  32#include <asm/xen/hypercall.h>
  33
  34#include <xen/xen.h>
  35#include <xen/privcmd.h>
  36#include <xen/interface/xen.h>
  37#include <xen/interface/memory.h>
  38#include <xen/interface/hvm/dm_op.h>
  39#include <xen/features.h>
  40#include <xen/page.h>
  41#include <xen/xen-ops.h>
  42#include <xen/balloon.h>
  43
  44#include "privcmd.h"
  45
  46MODULE_LICENSE("GPL");
  47
  48#define PRIV_VMA_LOCKED ((void *)1)
  49
  50static unsigned int privcmd_dm_op_max_num = 16;
  51module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
  52MODULE_PARM_DESC(dm_op_max_nr_bufs,
  53                 "Maximum number of buffers per dm_op hypercall");
  54
  55static unsigned int privcmd_dm_op_buf_max_size = 4096;
  56module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
  57                   0644);
  58MODULE_PARM_DESC(dm_op_buf_max_size,
  59                 "Maximum size of a dm_op hypercall buffer");
  60
  61struct privcmd_data {
  62        domid_t domid;
  63};
  64
  65static int privcmd_vma_range_is_mapped(
  66               struct vm_area_struct *vma,
  67               unsigned long addr,
  68               unsigned long nr_pages);
  69
  70static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
  71{
  72        struct privcmd_data *data = file->private_data;
  73        struct privcmd_hypercall hypercall;
  74        long ret;
  75
  76        /* Disallow arbitrary hypercalls if restricted */
  77        if (data->domid != DOMID_INVALID)
  78                return -EPERM;
  79
  80        if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
  81                return -EFAULT;
  82
  83        xen_preemptible_hcall_begin();
  84        ret = privcmd_call(hypercall.op,
  85                           hypercall.arg[0], hypercall.arg[1],
  86                           hypercall.arg[2], hypercall.arg[3],
  87                           hypercall.arg[4]);
  88        xen_preemptible_hcall_end();
  89
  90        return ret;
  91}
  92
  93static void free_page_list(struct list_head *pages)
  94{
  95        struct page *p, *n;
  96
  97        list_for_each_entry_safe(p, n, pages, lru)
  98                __free_page(p);
  99
 100        INIT_LIST_HEAD(pages);
 101}
 102
 103/*
 104 * Given an array of items in userspace, return a list of pages
 105 * containing the data.  If copying fails, either because of memory
 106 * allocation failure or a problem reading user memory, return an
 107 * error code; its up to the caller to dispose of any partial list.
 108 */
 109static int gather_array(struct list_head *pagelist,
 110                        unsigned nelem, size_t size,
 111                        const void __user *data)
 112{
 113        unsigned pageidx;
 114        void *pagedata;
 115        int ret;
 116
 117        if (size > PAGE_SIZE)
 118                return 0;
 119
 120        pageidx = PAGE_SIZE;
 121        pagedata = NULL;        /* quiet, gcc */
 122        while (nelem--) {
 123                if (pageidx > PAGE_SIZE-size) {
 124                        struct page *page = alloc_page(GFP_KERNEL);
 125
 126                        ret = -ENOMEM;
 127                        if (page == NULL)
 128                                goto fail;
 129
 130                        pagedata = page_address(page);
 131
 132                        list_add_tail(&page->lru, pagelist);
 133                        pageidx = 0;
 134                }
 135
 136                ret = -EFAULT;
 137                if (copy_from_user(pagedata + pageidx, data, size))
 138                        goto fail;
 139
 140                data += size;
 141                pageidx += size;
 142        }
 143
 144        ret = 0;
 145
 146fail:
 147        return ret;
 148}
 149
 150/*
 151 * Call function "fn" on each element of the array fragmented
 152 * over a list of pages.
 153 */
 154static int traverse_pages(unsigned nelem, size_t size,
 155                          struct list_head *pos,
 156                          int (*fn)(void *data, void *state),
 157                          void *state)
 158{
 159        void *pagedata;
 160        unsigned pageidx;
 161        int ret = 0;
 162
 163        BUG_ON(size > PAGE_SIZE);
 164
 165        pageidx = PAGE_SIZE;
 166        pagedata = NULL;        /* hush, gcc */
 167
 168        while (nelem--) {
 169                if (pageidx > PAGE_SIZE-size) {
 170                        struct page *page;
 171                        pos = pos->next;
 172                        page = list_entry(pos, struct page, lru);
 173                        pagedata = page_address(page);
 174                        pageidx = 0;
 175                }
 176
 177                ret = (*fn)(pagedata + pageidx, state);
 178                if (ret)
 179                        break;
 180                pageidx += size;
 181        }
 182
 183        return ret;
 184}
 185
 186/*
 187 * Similar to traverse_pages, but use each page as a "block" of
 188 * data to be processed as one unit.
 189 */
 190static int traverse_pages_block(unsigned nelem, size_t size,
 191                                struct list_head *pos,
 192                                int (*fn)(void *data, int nr, void *state),
 193                                void *state)
 194{
 195        void *pagedata;
 196        int ret = 0;
 197
 198        BUG_ON(size > PAGE_SIZE);
 199
 200        while (nelem) {
 201                int nr = (PAGE_SIZE/size);
 202                struct page *page;
 203                if (nr > nelem)
 204                        nr = nelem;
 205                pos = pos->next;
 206                page = list_entry(pos, struct page, lru);
 207                pagedata = page_address(page);
 208                ret = (*fn)(pagedata, nr, state);
 209                if (ret)
 210                        break;
 211                nelem -= nr;
 212        }
 213
 214        return ret;
 215}
 216
 217struct mmap_gfn_state {
 218        unsigned long va;
 219        struct vm_area_struct *vma;
 220        domid_t domain;
 221};
 222
 223static int mmap_gfn_range(void *data, void *state)
 224{
 225        struct privcmd_mmap_entry *msg = data;
 226        struct mmap_gfn_state *st = state;
 227        struct vm_area_struct *vma = st->vma;
 228        int rc;
 229
 230        /* Do not allow range to wrap the address space. */
 231        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
 232            ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
 233                return -EINVAL;
 234
 235        /* Range chunks must be contiguous in va space. */
 236        if ((msg->va != st->va) ||
 237            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 238                return -EINVAL;
 239
 240        rc = xen_remap_domain_gfn_range(vma,
 241                                        msg->va & PAGE_MASK,
 242                                        msg->mfn, msg->npages,
 243                                        vma->vm_page_prot,
 244                                        st->domain, NULL);
 245        if (rc < 0)
 246                return rc;
 247
 248        st->va += msg->npages << PAGE_SHIFT;
 249
 250        return 0;
 251}
 252
 253static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
 254{
 255        struct privcmd_data *data = file->private_data;
 256        struct privcmd_mmap mmapcmd;
 257        struct mm_struct *mm = current->mm;
 258        struct vm_area_struct *vma;
 259        int rc;
 260        LIST_HEAD(pagelist);
 261        struct mmap_gfn_state state;
 262
 263        /* We only support privcmd_ioctl_mmap_batch for auto translated. */
 264        if (xen_feature(XENFEAT_auto_translated_physmap))
 265                return -ENOSYS;
 266
 267        if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 268                return -EFAULT;
 269
 270        /* If restriction is in place, check the domid matches */
 271        if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
 272                return -EPERM;
 273
 274        rc = gather_array(&pagelist,
 275                          mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 276                          mmapcmd.entry);
 277
 278        if (rc || list_empty(&pagelist))
 279                goto out;
 280
 281        down_write(&mm->mmap_sem);
 282
 283        {
 284                struct page *page = list_first_entry(&pagelist,
 285                                                     struct page, lru);
 286                struct privcmd_mmap_entry *msg = page_address(page);
 287
 288                vma = find_vma(mm, msg->va);
 289                rc = -EINVAL;
 290
 291                if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
 292                        goto out_up;
 293                vma->vm_private_data = PRIV_VMA_LOCKED;
 294        }
 295
 296        state.va = vma->vm_start;
 297        state.vma = vma;
 298        state.domain = mmapcmd.dom;
 299
 300        rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 301                            &pagelist,
 302                            mmap_gfn_range, &state);
 303
 304
 305out_up:
 306        up_write(&mm->mmap_sem);
 307
 308out:
 309        free_page_list(&pagelist);
 310
 311        return rc;
 312}
 313
 314struct mmap_batch_state {
 315        domid_t domain;
 316        unsigned long va;
 317        struct vm_area_struct *vma;
 318        int index;
 319        /* A tristate:
 320         *      0 for no errors
 321         *      1 if at least one error has happened (and no
 322         *          -ENOENT errors have happened)
 323         *      -ENOENT if at least 1 -ENOENT has happened.
 324         */
 325        int global_error;
 326        int version;
 327
 328        /* User-space gfn array to store errors in the second pass for V1. */
 329        xen_pfn_t __user *user_gfn;
 330        /* User-space int array to store errors in the second pass for V2. */
 331        int __user *user_err;
 332};
 333
 334/* auto translated dom0 note: if domU being created is PV, then gfn is
 335 * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
 336 */
 337static int mmap_batch_fn(void *data, int nr, void *state)
 338{
 339        xen_pfn_t *gfnp = data;
 340        struct mmap_batch_state *st = state;
 341        struct vm_area_struct *vma = st->vma;
 342        struct page **pages = vma->vm_private_data;
 343        struct page **cur_pages = NULL;
 344        int ret;
 345
 346        if (xen_feature(XENFEAT_auto_translated_physmap))
 347                cur_pages = &pages[st->index];
 348
 349        BUG_ON(nr < 0);
 350        ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr,
 351                                         (int *)gfnp, st->vma->vm_page_prot,
 352                                         st->domain, cur_pages);
 353
 354        /* Adjust the global_error? */
 355        if (ret != nr) {
 356                if (ret == -ENOENT)
 357                        st->global_error = -ENOENT;
 358                else {
 359                        /* Record that at least one error has happened. */
 360                        if (st->global_error == 0)
 361                                st->global_error = 1;
 362                }
 363        }
 364        st->va += XEN_PAGE_SIZE * nr;
 365        st->index += nr / XEN_PFN_PER_PAGE;
 366
 367        return 0;
 368}
 369
 370static int mmap_return_error(int err, struct mmap_batch_state *st)
 371{
 372        int ret;
 373
 374        if (st->version == 1) {
 375                if (err) {
 376                        xen_pfn_t gfn;
 377
 378                        ret = get_user(gfn, st->user_gfn);
 379                        if (ret < 0)
 380                                return ret;
 381                        /*
 382                         * V1 encodes the error codes in the 32bit top
 383                         * nibble of the gfn (with its known
 384                         * limitations vis-a-vis 64 bit callers).
 385                         */
 386                        gfn |= (err == -ENOENT) ?
 387                                PRIVCMD_MMAPBATCH_PAGED_ERROR :
 388                                PRIVCMD_MMAPBATCH_MFN_ERROR;
 389                        return __put_user(gfn, st->user_gfn++);
 390                } else
 391                        st->user_gfn++;
 392        } else { /* st->version == 2 */
 393                if (err)
 394                        return __put_user(err, st->user_err++);
 395                else
 396                        st->user_err++;
 397        }
 398
 399        return 0;
 400}
 401
 402static int mmap_return_errors(void *data, int nr, void *state)
 403{
 404        struct mmap_batch_state *st = state;
 405        int *errs = data;
 406        int i;
 407        int ret;
 408
 409        for (i = 0; i < nr; i++) {
 410                ret = mmap_return_error(errs[i], st);
 411                if (ret < 0)
 412                        return ret;
 413        }
 414        return 0;
 415}
 416
 417/* Allocate pfns that are then mapped with gfns from foreign domid. Update
 418 * the vma with the page info to use later.
 419 * Returns: 0 if success, otherwise -errno
 420 */
 421static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
 422{
 423        int rc;
 424        struct page **pages;
 425
 426        pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
 427        if (pages == NULL)
 428                return -ENOMEM;
 429
 430        rc = alloc_xenballooned_pages(numpgs, pages);
 431        if (rc != 0) {
 432                pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
 433                        numpgs, rc);
 434                kfree(pages);
 435                return -ENOMEM;
 436        }
 437        BUG_ON(vma->vm_private_data != NULL);
 438        vma->vm_private_data = pages;
 439
 440        return 0;
 441}
 442
 443static const struct vm_operations_struct privcmd_vm_ops;
 444
 445static long privcmd_ioctl_mmap_batch(
 446        struct file *file, void __user *udata, int version)
 447{
 448        struct privcmd_data *data = file->private_data;
 449        int ret;
 450        struct privcmd_mmapbatch_v2 m;
 451        struct mm_struct *mm = current->mm;
 452        struct vm_area_struct *vma;
 453        unsigned long nr_pages;
 454        LIST_HEAD(pagelist);
 455        struct mmap_batch_state state;
 456
 457        switch (version) {
 458        case 1:
 459                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
 460                        return -EFAULT;
 461                /* Returns per-frame error in m.arr. */
 462                m.err = NULL;
 463                if (!access_ok(m.arr, m.num * sizeof(*m.arr)))
 464                        return -EFAULT;
 465                break;
 466        case 2:
 467                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
 468                        return -EFAULT;
 469                /* Returns per-frame error code in m.err. */
 470                if (!access_ok(m.err, m.num * (sizeof(*m.err))))
 471                        return -EFAULT;
 472                break;
 473        default:
 474                return -EINVAL;
 475        }
 476
 477        /* If restriction is in place, check the domid matches */
 478        if (data->domid != DOMID_INVALID && data->domid != m.dom)
 479                return -EPERM;
 480
 481        nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
 482        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 483                return -EINVAL;
 484
 485        ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 486
 487        if (ret)
 488                goto out;
 489        if (list_empty(&pagelist)) {
 490                ret = -EINVAL;
 491                goto out;
 492        }
 493
 494        if (version == 2) {
 495                /* Zero error array now to only copy back actual errors. */
 496                if (clear_user(m.err, sizeof(int) * m.num)) {
 497                        ret = -EFAULT;
 498                        goto out;
 499                }
 500        }
 501
 502        down_write(&mm->mmap_sem);
 503
 504        vma = find_vma(mm, m.addr);
 505        if (!vma ||
 506            vma->vm_ops != &privcmd_vm_ops) {
 507                ret = -EINVAL;
 508                goto out_unlock;
 509        }
 510
 511        /*
 512         * Caller must either:
 513         *
 514         * Map the whole VMA range, which will also allocate all the
 515         * pages required for the auto_translated_physmap case.
 516         *
 517         * Or
 518         *
 519         * Map unmapped holes left from a previous map attempt (e.g.,
 520         * because those foreign frames were previously paged out).
 521         */
 522        if (vma->vm_private_data == NULL) {
 523                if (m.addr != vma->vm_start ||
 524                    m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
 525                        ret = -EINVAL;
 526                        goto out_unlock;
 527                }
 528                if (xen_feature(XENFEAT_auto_translated_physmap)) {
 529                        ret = alloc_empty_pages(vma, nr_pages);
 530                        if (ret < 0)
 531                                goto out_unlock;
 532                } else
 533                        vma->vm_private_data = PRIV_VMA_LOCKED;
 534        } else {
 535                if (m.addr < vma->vm_start ||
 536                    m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
 537                        ret = -EINVAL;
 538                        goto out_unlock;
 539                }
 540                if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
 541                        ret = -EINVAL;
 542                        goto out_unlock;
 543                }
 544        }
 545
 546        state.domain        = m.dom;
 547        state.vma           = vma;
 548        state.va            = m.addr;
 549        state.index         = 0;
 550        state.global_error  = 0;
 551        state.version       = version;
 552
 553        BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0);
 554        /* mmap_batch_fn guarantees ret == 0 */
 555        BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
 556                                    &pagelist, mmap_batch_fn, &state));
 557
 558        up_write(&mm->mmap_sem);
 559
 560        if (state.global_error) {
 561                /* Write back errors in second pass. */
 562                state.user_gfn = (xen_pfn_t *)m.arr;
 563                state.user_err = m.err;
 564                ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
 565                                           &pagelist, mmap_return_errors, &state);
 566        } else
 567                ret = 0;
 568
 569        /* If we have not had any EFAULT-like global errors then set the global
 570         * error to -ENOENT if necessary. */
 571        if ((ret == 0) && (state.global_error == -ENOENT))
 572                ret = -ENOENT;
 573
 574out:
 575        free_page_list(&pagelist);
 576        return ret;
 577
 578out_unlock:
 579        up_write(&mm->mmap_sem);
 580        goto out;
 581}
 582
 583static int lock_pages(
 584        struct privcmd_dm_op_buf kbufs[], unsigned int num,
 585        struct page *pages[], unsigned int nr_pages)
 586{
 587        unsigned int i;
 588
 589        for (i = 0; i < num; i++) {
 590                unsigned int requested;
 591                int pinned;
 592
 593                requested = DIV_ROUND_UP(
 594                        offset_in_page(kbufs[i].uptr) + kbufs[i].size,
 595                        PAGE_SIZE);
 596                if (requested > nr_pages)
 597                        return -ENOSPC;
 598
 599                pinned = get_user_pages_fast(
 600                        (unsigned long) kbufs[i].uptr,
 601                        requested, FOLL_WRITE, pages);
 602                if (pinned < 0)
 603                        return pinned;
 604
 605                nr_pages -= pinned;
 606                pages += pinned;
 607        }
 608
 609        return 0;
 610}
 611
 612static void unlock_pages(struct page *pages[], unsigned int nr_pages)
 613{
 614        unsigned int i;
 615
 616        if (!pages)
 617                return;
 618
 619        for (i = 0; i < nr_pages; i++) {
 620                if (pages[i])
 621                        put_page(pages[i]);
 622        }
 623}
 624
 625static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
 626{
 627        struct privcmd_data *data = file->private_data;
 628        struct privcmd_dm_op kdata;
 629        struct privcmd_dm_op_buf *kbufs;
 630        unsigned int nr_pages = 0;
 631        struct page **pages = NULL;
 632        struct xen_dm_op_buf *xbufs = NULL;
 633        unsigned int i;
 634        long rc;
 635
 636        if (copy_from_user(&kdata, udata, sizeof(kdata)))
 637                return -EFAULT;
 638
 639        /* If restriction is in place, check the domid matches */
 640        if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
 641                return -EPERM;
 642
 643        if (kdata.num == 0)
 644                return 0;
 645
 646        if (kdata.num > privcmd_dm_op_max_num)
 647                return -E2BIG;
 648
 649        kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
 650        if (!kbufs)
 651                return -ENOMEM;
 652
 653        if (copy_from_user(kbufs, kdata.ubufs,
 654                           sizeof(*kbufs) * kdata.num)) {
 655                rc = -EFAULT;
 656                goto out;
 657        }
 658
 659        for (i = 0; i < kdata.num; i++) {
 660                if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
 661                        rc = -E2BIG;
 662                        goto out;
 663                }
 664
 665                if (!access_ok(kbufs[i].uptr,
 666                               kbufs[i].size)) {
 667                        rc = -EFAULT;
 668                        goto out;
 669                }
 670
 671                nr_pages += DIV_ROUND_UP(
 672                        offset_in_page(kbufs[i].uptr) + kbufs[i].size,
 673                        PAGE_SIZE);
 674        }
 675
 676        pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
 677        if (!pages) {
 678                rc = -ENOMEM;
 679                goto out;
 680        }
 681
 682        xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
 683        if (!xbufs) {
 684                rc = -ENOMEM;
 685                goto out;
 686        }
 687
 688        rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
 689        if (rc)
 690                goto out;
 691
 692        for (i = 0; i < kdata.num; i++) {
 693                set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
 694                xbufs[i].size = kbufs[i].size;
 695        }
 696
 697        xen_preemptible_hcall_begin();
 698        rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
 699        xen_preemptible_hcall_end();
 700
 701out:
 702        unlock_pages(pages, nr_pages);
 703        kfree(xbufs);
 704        kfree(pages);
 705        kfree(kbufs);
 706
 707        return rc;
 708}
 709
 710static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
 711{
 712        struct privcmd_data *data = file->private_data;
 713        domid_t dom;
 714
 715        if (copy_from_user(&dom, udata, sizeof(dom)))
 716                return -EFAULT;
 717
 718        /* Set restriction to the specified domain, or check it matches */
 719        if (data->domid == DOMID_INVALID)
 720                data->domid = dom;
 721        else if (data->domid != dom)
 722                return -EINVAL;
 723
 724        return 0;
 725}
 726
 727static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 728{
 729        struct privcmd_data *data = file->private_data;
 730        struct mm_struct *mm = current->mm;
 731        struct vm_area_struct *vma;
 732        struct privcmd_mmap_resource kdata;
 733        xen_pfn_t *pfns = NULL;
 734        struct xen_mem_acquire_resource xdata;
 735        int rc;
 736
 737        if (copy_from_user(&kdata, udata, sizeof(kdata)))
 738                return -EFAULT;
 739
 740        /* If restriction is in place, check the domid matches */
 741        if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
 742                return -EPERM;
 743
 744        down_write(&mm->mmap_sem);
 745
 746        vma = find_vma(mm, kdata.addr);
 747        if (!vma || vma->vm_ops != &privcmd_vm_ops) {
 748                rc = -EINVAL;
 749                goto out;
 750        }
 751
 752        pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL);
 753        if (!pfns) {
 754                rc = -ENOMEM;
 755                goto out;
 756        }
 757
 758        if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
 759            xen_feature(XENFEAT_auto_translated_physmap)) {
 760                unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE);
 761                struct page **pages;
 762                unsigned int i;
 763
 764                rc = alloc_empty_pages(vma, nr);
 765                if (rc < 0)
 766                        goto out;
 767
 768                pages = vma->vm_private_data;
 769                for (i = 0; i < kdata.num; i++) {
 770                        xen_pfn_t pfn =
 771                                page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
 772
 773                        pfns[i] = pfn + (i % XEN_PFN_PER_PAGE);
 774                }
 775        } else
 776                vma->vm_private_data = PRIV_VMA_LOCKED;
 777
 778        memset(&xdata, 0, sizeof(xdata));
 779        xdata.domid = kdata.dom;
 780        xdata.type = kdata.type;
 781        xdata.id = kdata.id;
 782        xdata.frame = kdata.idx;
 783        xdata.nr_frames = kdata.num;
 784        set_xen_guest_handle(xdata.frame_list, pfns);
 785
 786        xen_preemptible_hcall_begin();
 787        rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
 788        xen_preemptible_hcall_end();
 789
 790        if (rc)
 791                goto out;
 792
 793        if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
 794            xen_feature(XENFEAT_auto_translated_physmap)) {
 795                rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT);
 796        } else {
 797                unsigned int domid =
 798                        (xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
 799                        DOMID_SELF : kdata.dom;
 800                int num;
 801
 802                num = xen_remap_domain_mfn_array(vma,
 803                                                 kdata.addr & PAGE_MASK,
 804                                                 pfns, kdata.num, (int *)pfns,
 805                                                 vma->vm_page_prot,
 806                                                 domid,
 807                                                 vma->vm_private_data);
 808                if (num < 0)
 809                        rc = num;
 810                else if (num != kdata.num) {
 811                        unsigned int i;
 812
 813                        for (i = 0; i < num; i++) {
 814                                rc = pfns[i];
 815                                if (rc < 0)
 816                                        break;
 817                        }
 818                } else
 819                        rc = 0;
 820        }
 821
 822out:
 823        up_write(&mm->mmap_sem);
 824        kfree(pfns);
 825
 826        return rc;
 827}
 828
 829static long privcmd_ioctl(struct file *file,
 830                          unsigned int cmd, unsigned long data)
 831{
 832        int ret = -ENOTTY;
 833        void __user *udata = (void __user *) data;
 834
 835        switch (cmd) {
 836        case IOCTL_PRIVCMD_HYPERCALL:
 837                ret = privcmd_ioctl_hypercall(file, udata);
 838                break;
 839
 840        case IOCTL_PRIVCMD_MMAP:
 841                ret = privcmd_ioctl_mmap(file, udata);
 842                break;
 843
 844        case IOCTL_PRIVCMD_MMAPBATCH:
 845                ret = privcmd_ioctl_mmap_batch(file, udata, 1);
 846                break;
 847
 848        case IOCTL_PRIVCMD_MMAPBATCH_V2:
 849                ret = privcmd_ioctl_mmap_batch(file, udata, 2);
 850                break;
 851
 852        case IOCTL_PRIVCMD_DM_OP:
 853                ret = privcmd_ioctl_dm_op(file, udata);
 854                break;
 855
 856        case IOCTL_PRIVCMD_RESTRICT:
 857                ret = privcmd_ioctl_restrict(file, udata);
 858                break;
 859
 860        case IOCTL_PRIVCMD_MMAP_RESOURCE:
 861                ret = privcmd_ioctl_mmap_resource(file, udata);
 862                break;
 863
 864        default:
 865                break;
 866        }
 867
 868        return ret;
 869}
 870
 871static int privcmd_open(struct inode *ino, struct file *file)
 872{
 873        struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
 874
 875        if (!data)
 876                return -ENOMEM;
 877
 878        /* DOMID_INVALID implies no restriction */
 879        data->domid = DOMID_INVALID;
 880
 881        file->private_data = data;
 882        return 0;
 883}
 884
 885static int privcmd_release(struct inode *ino, struct file *file)
 886{
 887        struct privcmd_data *data = file->private_data;
 888
 889        kfree(data);
 890        return 0;
 891}
 892
 893static void privcmd_close(struct vm_area_struct *vma)
 894{
 895        struct page **pages = vma->vm_private_data;
 896        int numpgs = vma_pages(vma);
 897        int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
 898        int rc;
 899
 900        if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
 901                return;
 902
 903        rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
 904        if (rc == 0)
 905                free_xenballooned_pages(numpgs, pages);
 906        else
 907                pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
 908                        numpgs, rc);
 909        kfree(pages);
 910}
 911
 912static vm_fault_t privcmd_fault(struct vm_fault *vmf)
 913{
 914        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 915               vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
 916               vmf->pgoff, (void *)vmf->address);
 917
 918        return VM_FAULT_SIGBUS;
 919}
 920
 921static const struct vm_operations_struct privcmd_vm_ops = {
 922        .close = privcmd_close,
 923        .fault = privcmd_fault
 924};
 925
 926static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 927{
 928        /* DONTCOPY is essential for Xen because copy_page_range doesn't know
 929         * how to recreate these mappings */
 930        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
 931                         VM_DONTEXPAND | VM_DONTDUMP;
 932        vma->vm_ops = &privcmd_vm_ops;
 933        vma->vm_private_data = NULL;
 934
 935        return 0;
 936}
 937
 938/*
 939 * For MMAPBATCH*. This allows asserting the singleshot mapping
 940 * on a per pfn/pte basis. Mapping calls that fail with ENOENT
 941 * can be then retried until success.
 942 */
 943static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
 944{
 945        return pte_none(*pte) ? 0 : -EBUSY;
 946}
 947
 948static int privcmd_vma_range_is_mapped(
 949                   struct vm_area_struct *vma,
 950                   unsigned long addr,
 951                   unsigned long nr_pages)
 952{
 953        return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
 954                                   is_mapped_fn, NULL) != 0;
 955}
 956
 957const struct file_operations xen_privcmd_fops = {
 958        .owner = THIS_MODULE,
 959        .unlocked_ioctl = privcmd_ioctl,
 960        .open = privcmd_open,
 961        .release = privcmd_release,
 962        .mmap = privcmd_mmap,
 963};
 964EXPORT_SYMBOL_GPL(xen_privcmd_fops);
 965
 966static struct miscdevice privcmd_dev = {
 967        .minor = MISC_DYNAMIC_MINOR,
 968        .name = "xen/privcmd",
 969        .fops = &xen_privcmd_fops,
 970};
 971
 972static int __init privcmd_init(void)
 973{
 974        int err;
 975
 976        if (!xen_domain())
 977                return -ENODEV;
 978
 979        err = misc_register(&privcmd_dev);
 980        if (err != 0) {
 981                pr_err("Could not register Xen privcmd device\n");
 982                return err;
 983        }
 984
 985        err = misc_register(&xen_privcmdbuf_dev);
 986        if (err != 0) {
 987                pr_err("Could not register Xen hypercall-buf device\n");
 988                misc_deregister(&privcmd_dev);
 989                return err;
 990        }
 991
 992        return 0;
 993}
 994
 995static void __exit privcmd_exit(void)
 996{
 997        misc_deregister(&privcmd_dev);
 998        misc_deregister(&xen_privcmdbuf_dev);
 999}
1000
1001module_init(privcmd_init);
1002module_exit(privcmd_exit);
1003