linux/drivers/xen/xenfs/privcmd.c
<<
>>
Prefs
   1/******************************************************************************
   2 * privcmd.c
   3 *
   4 * Interface to privileged domain-0 commands.
   5 *
   6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   7 */
   8
   9#include <linux/kernel.h>
  10#include <linux/sched.h>
  11#include <linux/slab.h>
  12#include <linux/string.h>
  13#include <linux/errno.h>
  14#include <linux/mm.h>
  15#include <linux/mman.h>
  16#include <linux/uaccess.h>
  17#include <linux/swap.h>
  18#include <linux/highmem.h>
  19#include <linux/pagemap.h>
  20#include <linux/seq_file.h>
  21
  22#include <asm/pgalloc.h>
  23#include <asm/pgtable.h>
  24#include <asm/tlb.h>
  25#include <asm/xen/hypervisor.h>
  26#include <asm/xen/hypercall.h>
  27
  28#include <xen/xen.h>
  29#include <xen/privcmd.h>
  30#include <xen/interface/xen.h>
  31#include <xen/features.h>
  32#include <xen/page.h>
  33#include <xen/xen-ops.h>
  34
  35#ifndef HAVE_ARCH_PRIVCMD_MMAP
  36static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
  37#endif
  38
  39static long privcmd_ioctl_hypercall(void __user *udata)
  40{
  41        struct privcmd_hypercall hypercall;
  42        long ret;
  43
  44        if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
  45                return -EFAULT;
  46
  47        ret = privcmd_call(hypercall.op,
  48                           hypercall.arg[0], hypercall.arg[1],
  49                           hypercall.arg[2], hypercall.arg[3],
  50                           hypercall.arg[4]);
  51
  52        return ret;
  53}
  54
  55static void free_page_list(struct list_head *pages)
  56{
  57        struct page *p, *n;
  58
  59        list_for_each_entry_safe(p, n, pages, lru)
  60                __free_page(p);
  61
  62        INIT_LIST_HEAD(pages);
  63}
  64
  65/*
  66 * Given an array of items in userspace, return a list of pages
  67 * containing the data.  If copying fails, either because of memory
  68 * allocation failure or a problem reading user memory, return an
  69 * error code; its up to the caller to dispose of any partial list.
  70 */
  71static int gather_array(struct list_head *pagelist,
  72                        unsigned nelem, size_t size,
  73                        void __user *data)
  74{
  75        unsigned pageidx;
  76        void *pagedata;
  77        int ret;
  78
  79        if (size > PAGE_SIZE)
  80                return 0;
  81
  82        pageidx = PAGE_SIZE;
  83        pagedata = NULL;        /* quiet, gcc */
  84        while (nelem--) {
  85                if (pageidx > PAGE_SIZE-size) {
  86                        struct page *page = alloc_page(GFP_KERNEL);
  87
  88                        ret = -ENOMEM;
  89                        if (page == NULL)
  90                                goto fail;
  91
  92                        pagedata = page_address(page);
  93
  94                        list_add_tail(&page->lru, pagelist);
  95                        pageidx = 0;
  96                }
  97
  98                ret = -EFAULT;
  99                if (copy_from_user(pagedata + pageidx, data, size))
 100                        goto fail;
 101
 102                data += size;
 103                pageidx += size;
 104        }
 105
 106        ret = 0;
 107
 108fail:
 109        return ret;
 110}
 111
 112/*
 113 * Call function "fn" on each element of the array fragmented
 114 * over a list of pages.
 115 */
 116static int traverse_pages(unsigned nelem, size_t size,
 117                          struct list_head *pos,
 118                          int (*fn)(void *data, void *state),
 119                          void *state)
 120{
 121        void *pagedata;
 122        unsigned pageidx;
 123        int ret = 0;
 124
 125        BUG_ON(size > PAGE_SIZE);
 126
 127        pageidx = PAGE_SIZE;
 128        pagedata = NULL;        /* hush, gcc */
 129
 130        while (nelem--) {
 131                if (pageidx > PAGE_SIZE-size) {
 132                        struct page *page;
 133                        pos = pos->next;
 134                        page = list_entry(pos, struct page, lru);
 135                        pagedata = page_address(page);
 136                        pageidx = 0;
 137                }
 138
 139                ret = (*fn)(pagedata + pageidx, state);
 140                if (ret)
 141                        break;
 142                pageidx += size;
 143        }
 144
 145        return ret;
 146}
 147
 148struct mmap_mfn_state {
 149        unsigned long va;
 150        struct vm_area_struct *vma;
 151        domid_t domain;
 152};
 153
 154static int mmap_mfn_range(void *data, void *state)
 155{
 156        struct privcmd_mmap_entry *msg = data;
 157        struct mmap_mfn_state *st = state;
 158        struct vm_area_struct *vma = st->vma;
 159        int rc;
 160
 161        /* Do not allow range to wrap the address space. */
 162        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
 163            ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
 164                return -EINVAL;
 165
 166        /* Range chunks must be contiguous in va space. */
 167        if ((msg->va != st->va) ||
 168            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 169                return -EINVAL;
 170
 171        rc = xen_remap_domain_mfn_range(vma,
 172                                        msg->va & PAGE_MASK,
 173                                        msg->mfn, msg->npages,
 174                                        vma->vm_page_prot,
 175                                        st->domain);
 176        if (rc < 0)
 177                return rc;
 178
 179        st->va += msg->npages << PAGE_SHIFT;
 180
 181        return 0;
 182}
 183
 184static long privcmd_ioctl_mmap(void __user *udata)
 185{
 186        struct privcmd_mmap mmapcmd;
 187        struct mm_struct *mm = current->mm;
 188        struct vm_area_struct *vma;
 189        int rc;
 190        LIST_HEAD(pagelist);
 191        struct mmap_mfn_state state;
 192
 193        if (!xen_initial_domain())
 194                return -EPERM;
 195
 196        if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 197                return -EFAULT;
 198
 199        rc = gather_array(&pagelist,
 200                          mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 201                          mmapcmd.entry);
 202
 203        if (rc || list_empty(&pagelist))
 204                goto out;
 205
 206        down_write(&mm->mmap_sem);
 207
 208        {
 209                struct page *page = list_first_entry(&pagelist,
 210                                                     struct page, lru);
 211                struct privcmd_mmap_entry *msg = page_address(page);
 212
 213                vma = find_vma(mm, msg->va);
 214                rc = -EINVAL;
 215
 216                if (!vma || (msg->va != vma->vm_start) ||
 217                    !privcmd_enforce_singleshot_mapping(vma))
 218                        goto out_up;
 219        }
 220
 221        state.va = vma->vm_start;
 222        state.vma = vma;
 223        state.domain = mmapcmd.dom;
 224
 225        rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 226                            &pagelist,
 227                            mmap_mfn_range, &state);
 228
 229
 230out_up:
 231        up_write(&mm->mmap_sem);
 232
 233out:
 234        free_page_list(&pagelist);
 235
 236        return rc;
 237}
 238
 239struct mmap_batch_state {
 240        domid_t domain;
 241        unsigned long va;
 242        struct vm_area_struct *vma;
 243        int err;
 244
 245        xen_pfn_t __user *user;
 246};
 247
 248static int mmap_batch_fn(void *data, void *state)
 249{
 250        xen_pfn_t *mfnp = data;
 251        struct mmap_batch_state *st = state;
 252
 253        if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
 254                                       st->vma->vm_page_prot, st->domain) < 0) {
 255                *mfnp |= 0xf0000000U;
 256                st->err++;
 257        }
 258        st->va += PAGE_SIZE;
 259
 260        return 0;
 261}
 262
 263static int mmap_return_errors(void *data, void *state)
 264{
 265        xen_pfn_t *mfnp = data;
 266        struct mmap_batch_state *st = state;
 267
 268        return put_user(*mfnp, st->user++);
 269}
 270
 271static struct vm_operations_struct privcmd_vm_ops;
 272
 273static long privcmd_ioctl_mmap_batch(void __user *udata)
 274{
 275        int ret;
 276        struct privcmd_mmapbatch m;
 277        struct mm_struct *mm = current->mm;
 278        struct vm_area_struct *vma;
 279        unsigned long nr_pages;
 280        LIST_HEAD(pagelist);
 281        struct mmap_batch_state state;
 282
 283        if (!xen_initial_domain())
 284                return -EPERM;
 285
 286        if (copy_from_user(&m, udata, sizeof(m)))
 287                return -EFAULT;
 288
 289        nr_pages = m.num;
 290        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 291                return -EINVAL;
 292
 293        ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
 294                           m.arr);
 295
 296        if (ret || list_empty(&pagelist))
 297                goto out;
 298
 299        down_write(&mm->mmap_sem);
 300
 301        vma = find_vma(mm, m.addr);
 302        ret = -EINVAL;
 303        if (!vma ||
 304            vma->vm_ops != &privcmd_vm_ops ||
 305            (m.addr != vma->vm_start) ||
 306            ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
 307            !privcmd_enforce_singleshot_mapping(vma)) {
 308                up_write(&mm->mmap_sem);
 309                goto out;
 310        }
 311
 312        state.domain = m.dom;
 313        state.vma = vma;
 314        state.va = m.addr;
 315        state.err = 0;
 316
 317        ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 318                             &pagelist, mmap_batch_fn, &state);
 319
 320        up_write(&mm->mmap_sem);
 321
 322        if (state.err > 0) {
 323                state.user = m.arr;
 324                ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 325                               &pagelist,
 326                               mmap_return_errors, &state);
 327        }
 328
 329out:
 330        free_page_list(&pagelist);
 331
 332        return ret;
 333}
 334
 335static long privcmd_ioctl(struct file *file,
 336                          unsigned int cmd, unsigned long data)
 337{
 338        int ret = -ENOSYS;
 339        void __user *udata = (void __user *) data;
 340
 341        switch (cmd) {
 342        case IOCTL_PRIVCMD_HYPERCALL:
 343                ret = privcmd_ioctl_hypercall(udata);
 344                break;
 345
 346        case IOCTL_PRIVCMD_MMAP:
 347                ret = privcmd_ioctl_mmap(udata);
 348                break;
 349
 350        case IOCTL_PRIVCMD_MMAPBATCH:
 351                ret = privcmd_ioctl_mmap_batch(udata);
 352                break;
 353
 354        default:
 355                ret = -EINVAL;
 356                break;
 357        }
 358
 359        return ret;
 360}
 361
 362#ifndef HAVE_ARCH_PRIVCMD_MMAP
 363static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 364{
 365        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 366               vma, vma->vm_start, vma->vm_end,
 367               vmf->pgoff, vmf->virtual_address);
 368
 369        return VM_FAULT_SIGBUS;
 370}
 371
 372static struct vm_operations_struct privcmd_vm_ops = {
 373        .fault = privcmd_fault
 374};
 375
 376static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 377{
 378        /* Unsupported for auto-translate guests. */
 379        if (xen_feature(XENFEAT_auto_translated_physmap))
 380                return -ENOSYS;
 381
 382        /* DONTCOPY is essential for Xen because copy_page_range doesn't know
 383         * how to recreate these mappings */
 384        vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
 385        vma->vm_ops = &privcmd_vm_ops;
 386        vma->vm_private_data = NULL;
 387
 388        return 0;
 389}
 390
 391static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 392{
 393        return (xchg(&vma->vm_private_data, (void *)1) == NULL);
 394}
 395#endif
 396
 397const struct file_operations privcmd_file_ops = {
 398        .unlocked_ioctl = privcmd_ioctl,
 399        .mmap = privcmd_mmap,
 400};
 401