linux/drivers/xen/gntdev.c
<<
>>
Prefs
   1/******************************************************************************
   2 * gntdev.c
   3 *
   4 * Device for accessing (in user-space) pages that have been granted by other
   5 * domains.
   6 *
   7 * Copyright (c) 2006-2007, D G Murray.
   8 *           (c) 2009 Gerd Hoffmann <kraxel@redhat.com>
   9 *           (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19 */
  20
  21#undef DEBUG
  22
  23#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  24
  25#include <linux/dma-mapping.h>
  26#include <linux/module.h>
  27#include <linux/kernel.h>
  28#include <linux/init.h>
  29#include <linux/miscdevice.h>
  30#include <linux/fs.h>
  31#include <linux/uaccess.h>
  32#include <linux/sched.h>
  33#include <linux/sched/mm.h>
  34#include <linux/spinlock.h>
  35#include <linux/slab.h>
  36#include <linux/highmem.h>
  37#include <linux/refcount.h>
  38
  39#include <xen/xen.h>
  40#include <xen/grant_table.h>
  41#include <xen/balloon.h>
  42#include <xen/gntdev.h>
  43#include <xen/events.h>
  44#include <xen/page.h>
  45#include <asm/xen/hypervisor.h>
  46#include <asm/xen/hypercall.h>
  47
  48#include "gntdev-common.h"
  49#ifdef CONFIG_XEN_GNTDEV_DMABUF
  50#include "gntdev-dmabuf.h"
  51#endif
  52
  53MODULE_LICENSE("GPL");
  54MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
  55              "Gerd Hoffmann <kraxel@redhat.com>");
  56MODULE_DESCRIPTION("User-space granted page access driver");
  57
  58static int limit = 1024*1024;
  59module_param(limit, int, 0644);
  60MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
  61                "the gntdev device");
  62
  63static atomic_t pages_mapped = ATOMIC_INIT(0);
  64
  65static int use_ptemod;
  66#define populate_freeable_maps use_ptemod
  67
  68static int unmap_grant_pages(struct gntdev_grant_map *map,
  69                             int offset, int pages);
  70
  71static struct miscdevice gntdev_miscdev;
  72
  73/* ------------------------------------------------------------------ */
  74
  75bool gntdev_account_mapped_pages(int count)
  76{
  77        return atomic_add_return(count, &pages_mapped) > limit;
  78}
  79
  80static void gntdev_print_maps(struct gntdev_priv *priv,
  81                              char *text, int text_index)
  82{
  83#ifdef DEBUG
  84        struct gntdev_grant_map *map;
  85
  86        pr_debug("%s: maps list (priv %p)\n", __func__, priv);
  87        list_for_each_entry(map, &priv->maps, next)
  88                pr_debug("  index %2d, count %2d %s\n",
  89                       map->index, map->count,
  90                       map->index == text_index && text ? text : "");
  91#endif
  92}
  93
  94static void gntdev_free_map(struct gntdev_grant_map *map)
  95{
  96        if (map == NULL)
  97                return;
  98
  99#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 100        if (map->dma_vaddr) {
 101                struct gnttab_dma_alloc_args args;
 102
 103                args.dev = map->dma_dev;
 104                args.coherent = !!(map->dma_flags & GNTDEV_DMA_FLAG_COHERENT);
 105                args.nr_pages = map->count;
 106                args.pages = map->pages;
 107                args.frames = map->frames;
 108                args.vaddr = map->dma_vaddr;
 109                args.dev_bus_addr = map->dma_bus_addr;
 110
 111                gnttab_dma_free_pages(&args);
 112        } else
 113#endif
 114        if (map->pages)
 115                gnttab_free_pages(map->count, map->pages);
 116
 117#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 118        kfree(map->frames);
 119#endif
 120        kfree(map->pages);
 121        kfree(map->grants);
 122        kfree(map->map_ops);
 123        kfree(map->unmap_ops);
 124        kfree(map->kmap_ops);
 125        kfree(map->kunmap_ops);
 126        kfree(map);
 127}
 128
 129struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
 130                                          int dma_flags)
 131{
 132        struct gntdev_grant_map *add;
 133        int i;
 134
 135        add = kzalloc(sizeof(*add), GFP_KERNEL);
 136        if (NULL == add)
 137                return NULL;
 138
 139        add->grants    = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
 140        add->map_ops   = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
 141        add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
 142        add->kmap_ops  = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
 143        add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
 144        add->pages     = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
 145        if (NULL == add->grants    ||
 146            NULL == add->map_ops   ||
 147            NULL == add->unmap_ops ||
 148            NULL == add->kmap_ops  ||
 149            NULL == add->kunmap_ops ||
 150            NULL == add->pages)
 151                goto err;
 152
 153#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 154        add->dma_flags = dma_flags;
 155
 156        /*
 157         * Check if this mapping is requested to be backed
 158         * by a DMA buffer.
 159         */
 160        if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
 161                struct gnttab_dma_alloc_args args;
 162
 163                add->frames = kcalloc(count, sizeof(add->frames[0]),
 164                                      GFP_KERNEL);
 165                if (!add->frames)
 166                        goto err;
 167
 168                /* Remember the device, so we can free DMA memory. */
 169                add->dma_dev = priv->dma_dev;
 170
 171                args.dev = priv->dma_dev;
 172                args.coherent = !!(dma_flags & GNTDEV_DMA_FLAG_COHERENT);
 173                args.nr_pages = count;
 174                args.pages = add->pages;
 175                args.frames = add->frames;
 176
 177                if (gnttab_dma_alloc_pages(&args))
 178                        goto err;
 179
 180                add->dma_vaddr = args.vaddr;
 181                add->dma_bus_addr = args.dev_bus_addr;
 182        } else
 183#endif
 184        if (gnttab_alloc_pages(count, add->pages))
 185                goto err;
 186
 187        for (i = 0; i < count; i++) {
 188                add->map_ops[i].handle = -1;
 189                add->unmap_ops[i].handle = -1;
 190                add->kmap_ops[i].handle = -1;
 191                add->kunmap_ops[i].handle = -1;
 192        }
 193
 194        add->index = 0;
 195        add->count = count;
 196        refcount_set(&add->users, 1);
 197
 198        return add;
 199
 200err:
 201        gntdev_free_map(add);
 202        return NULL;
 203}
 204
 205void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add)
 206{
 207        struct gntdev_grant_map *map;
 208
 209        list_for_each_entry(map, &priv->maps, next) {
 210                if (add->index + add->count < map->index) {
 211                        list_add_tail(&add->next, &map->next);
 212                        goto done;
 213                }
 214                add->index = map->index + map->count;
 215        }
 216        list_add_tail(&add->next, &priv->maps);
 217
 218done:
 219        gntdev_print_maps(priv, "[new]", add->index);
 220}
 221
 222static struct gntdev_grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
 223                                                      int index, int count)
 224{
 225        struct gntdev_grant_map *map;
 226
 227        list_for_each_entry(map, &priv->maps, next) {
 228                if (map->index != index)
 229                        continue;
 230                if (count && map->count != count)
 231                        continue;
 232                return map;
 233        }
 234        return NULL;
 235}
 236
 237void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
 238{
 239        if (!map)
 240                return;
 241
 242        if (!refcount_dec_and_test(&map->users))
 243                return;
 244
 245        atomic_sub(map->count, &pages_mapped);
 246
 247        if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 248                notify_remote_via_evtchn(map->notify.event);
 249                evtchn_put(map->notify.event);
 250        }
 251
 252        if (populate_freeable_maps && priv) {
 253                mutex_lock(&priv->lock);
 254                list_del(&map->next);
 255                mutex_unlock(&priv->lock);
 256        }
 257
 258        if (map->pages && !use_ptemod)
 259                unmap_grant_pages(map, 0, map->count);
 260        gntdev_free_map(map);
 261}
 262
 263/* ------------------------------------------------------------------ */
 264
 265static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 266{
 267        struct gntdev_grant_map *map = data;
 268        unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
 269        int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
 270        u64 pte_maddr;
 271
 272        BUG_ON(pgnr >= map->count);
 273        pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 274
 275        /*
 276         * Set the PTE as special to force get_user_pages_fast() fall
 277         * back to the slow path.  If this is not supported as part of
 278         * the grant map, it will be done afterwards.
 279         */
 280        if (xen_feature(XENFEAT_gnttab_map_avail_bits))
 281                flags |= (1 << _GNTMAP_guest_avail0);
 282
 283        gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 284                          map->grants[pgnr].ref,
 285                          map->grants[pgnr].domid);
 286        gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
 287                            -1 /* handle */);
 288        return 0;
 289}
 290
 291#ifdef CONFIG_X86
 292static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
 293{
 294        set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
 295        return 0;
 296}
 297#endif
 298
 299int gntdev_map_grant_pages(struct gntdev_grant_map *map)
 300{
 301        int i, err = 0;
 302
 303        if (!use_ptemod) {
 304                /* Note: it could already be mapped */
 305                if (map->map_ops[0].handle != -1)
 306                        return 0;
 307                for (i = 0; i < map->count; i++) {
 308                        unsigned long addr = (unsigned long)
 309                                pfn_to_kaddr(page_to_pfn(map->pages[i]));
 310                        gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
 311                                map->grants[i].ref,
 312                                map->grants[i].domid);
 313                        gnttab_set_unmap_op(&map->unmap_ops[i], addr,
 314                                map->flags, -1 /* handle */);
 315                }
 316        } else {
 317                /*
 318                 * Setup the map_ops corresponding to the pte entries pointing
 319                 * to the kernel linear addresses of the struct pages.
 320                 * These ptes are completely different from the user ptes dealt
 321                 * with find_grant_ptes.
 322                 */
 323                for (i = 0; i < map->count; i++) {
 324                        unsigned long address = (unsigned long)
 325                                pfn_to_kaddr(page_to_pfn(map->pages[i]));
 326                        BUG_ON(PageHighMem(map->pages[i]));
 327
 328                        gnttab_set_map_op(&map->kmap_ops[i], address,
 329                                map->flags | GNTMAP_host_map,
 330                                map->grants[i].ref,
 331                                map->grants[i].domid);
 332                        gnttab_set_unmap_op(&map->kunmap_ops[i], address,
 333                                map->flags | GNTMAP_host_map, -1);
 334                }
 335        }
 336
 337        pr_debug("map %d+%d\n", map->index, map->count);
 338        err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
 339                        map->pages, map->count);
 340        if (err)
 341                return err;
 342
 343        for (i = 0; i < map->count; i++) {
 344                if (map->map_ops[i].status) {
 345                        err = -EINVAL;
 346                        continue;
 347                }
 348
 349                map->unmap_ops[i].handle = map->map_ops[i].handle;
 350                if (use_ptemod)
 351                        map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
 352#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 353                else if (map->dma_vaddr) {
 354                        unsigned long bfn;
 355
 356                        bfn = pfn_to_bfn(page_to_pfn(map->pages[i]));
 357                        map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn);
 358                }
 359#endif
 360        }
 361        return err;
 362}
 363
 364static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
 365                               int pages)
 366{
 367        int i, err = 0;
 368        struct gntab_unmap_queue_data unmap_data;
 369
 370        if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 371                int pgno = (map->notify.addr >> PAGE_SHIFT);
 372                if (pgno >= offset && pgno < offset + pages) {
 373                        /* No need for kmap, pages are in lowmem */
 374                        uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
 375                        tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
 376                        map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 377                }
 378        }
 379
 380        unmap_data.unmap_ops = map->unmap_ops + offset;
 381        unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
 382        unmap_data.pages = map->pages + offset;
 383        unmap_data.count = pages;
 384
 385        err = gnttab_unmap_refs_sync(&unmap_data);
 386        if (err)
 387                return err;
 388
 389        for (i = 0; i < pages; i++) {
 390                if (map->unmap_ops[offset+i].status)
 391                        err = -EINVAL;
 392                pr_debug("unmap handle=%d st=%d\n",
 393                        map->unmap_ops[offset+i].handle,
 394                        map->unmap_ops[offset+i].status);
 395                map->unmap_ops[offset+i].handle = -1;
 396        }
 397        return err;
 398}
 399
 400static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
 401                             int pages)
 402{
 403        int range, err = 0;
 404
 405        pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 406
 407        /* It is possible the requested range will have a "hole" where we
 408         * already unmapped some of the grants. Only unmap valid ranges.
 409         */
 410        while (pages && !err) {
 411                while (pages && map->unmap_ops[offset].handle == -1) {
 412                        offset++;
 413                        pages--;
 414                }
 415                range = 0;
 416                while (range < pages) {
 417                        if (map->unmap_ops[offset+range].handle == -1)
 418                                break;
 419                        range++;
 420                }
 421                err = __unmap_grant_pages(map, offset, range);
 422                offset += range;
 423                pages -= range;
 424        }
 425
 426        return err;
 427}
 428
 429/* ------------------------------------------------------------------ */
 430
 431static void gntdev_vma_open(struct vm_area_struct *vma)
 432{
 433        struct gntdev_grant_map *map = vma->vm_private_data;
 434
 435        pr_debug("gntdev_vma_open %p\n", vma);
 436        refcount_inc(&map->users);
 437}
 438
 439static void gntdev_vma_close(struct vm_area_struct *vma)
 440{
 441        struct gntdev_grant_map *map = vma->vm_private_data;
 442        struct file *file = vma->vm_file;
 443        struct gntdev_priv *priv = file->private_data;
 444
 445        pr_debug("gntdev_vma_close %p\n", vma);
 446        if (use_ptemod) {
 447                /* It is possible that an mmu notifier could be running
 448                 * concurrently, so take priv->lock to ensure that the vma won't
 449                 * vanishing during the unmap_grant_pages call, since we will
 450                 * spin here until that completes. Such a concurrent call will
 451                 * not do any unmapping, since that has been done prior to
 452                 * closing the vma, but it may still iterate the unmap_ops list.
 453                 */
 454                mutex_lock(&priv->lock);
 455                map->vma = NULL;
 456                mutex_unlock(&priv->lock);
 457        }
 458        vma->vm_private_data = NULL;
 459        gntdev_put_map(priv, map);
 460}
 461
 462static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
 463                                                 unsigned long addr)
 464{
 465        struct gntdev_grant_map *map = vma->vm_private_data;
 466
 467        return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT];
 468}
 469
 470static const struct vm_operations_struct gntdev_vmops = {
 471        .open = gntdev_vma_open,
 472        .close = gntdev_vma_close,
 473        .find_special_page = gntdev_vma_find_special_page,
 474};
 475
 476/* ------------------------------------------------------------------ */
 477
 478static bool in_range(struct gntdev_grant_map *map,
 479                              unsigned long start, unsigned long end)
 480{
 481        if (!map->vma)
 482                return false;
 483        if (map->vma->vm_start >= end)
 484                return false;
 485        if (map->vma->vm_end <= start)
 486                return false;
 487
 488        return true;
 489}
 490
 491static int unmap_if_in_range(struct gntdev_grant_map *map,
 492                              unsigned long start, unsigned long end,
 493                              bool blockable)
 494{
 495        unsigned long mstart, mend;
 496        int err;
 497
 498        if (!in_range(map, start, end))
 499                return 0;
 500
 501        if (!blockable)
 502                return -EAGAIN;
 503
 504        mstart = max(start, map->vma->vm_start);
 505        mend   = min(end,   map->vma->vm_end);
 506        pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
 507                        map->index, map->count,
 508                        map->vma->vm_start, map->vma->vm_end,
 509                        start, end, mstart, mend);
 510        err = unmap_grant_pages(map,
 511                                (mstart - map->vma->vm_start) >> PAGE_SHIFT,
 512                                (mend - mstart) >> PAGE_SHIFT);
 513        WARN_ON(err);
 514
 515        return 0;
 516}
 517
 518static int mn_invl_range_start(struct mmu_notifier *mn,
 519                               const struct mmu_notifier_range *range)
 520{
 521        struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 522        struct gntdev_grant_map *map;
 523        int ret = 0;
 524
 525        if (mmu_notifier_range_blockable(range))
 526                mutex_lock(&priv->lock);
 527        else if (!mutex_trylock(&priv->lock))
 528                return -EAGAIN;
 529
 530        list_for_each_entry(map, &priv->maps, next) {
 531                ret = unmap_if_in_range(map, range->start, range->end,
 532                                        mmu_notifier_range_blockable(range));
 533                if (ret)
 534                        goto out_unlock;
 535        }
 536        list_for_each_entry(map, &priv->freeable_maps, next) {
 537                ret = unmap_if_in_range(map, range->start, range->end,
 538                                        mmu_notifier_range_blockable(range));
 539                if (ret)
 540                        goto out_unlock;
 541        }
 542
 543out_unlock:
 544        mutex_unlock(&priv->lock);
 545
 546        return ret;
 547}
 548
 549static void mn_release(struct mmu_notifier *mn,
 550                       struct mm_struct *mm)
 551{
 552        struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 553        struct gntdev_grant_map *map;
 554        int err;
 555
 556        mutex_lock(&priv->lock);
 557        list_for_each_entry(map, &priv->maps, next) {
 558                if (!map->vma)
 559                        continue;
 560                pr_debug("map %d+%d (%lx %lx)\n",
 561                                map->index, map->count,
 562                                map->vma->vm_start, map->vma->vm_end);
 563                err = unmap_grant_pages(map, /* offset */ 0, map->count);
 564                WARN_ON(err);
 565        }
 566        list_for_each_entry(map, &priv->freeable_maps, next) {
 567                if (!map->vma)
 568                        continue;
 569                pr_debug("map %d+%d (%lx %lx)\n",
 570                                map->index, map->count,
 571                                map->vma->vm_start, map->vma->vm_end);
 572                err = unmap_grant_pages(map, /* offset */ 0, map->count);
 573                WARN_ON(err);
 574        }
 575        mutex_unlock(&priv->lock);
 576}
 577
 578static const struct mmu_notifier_ops gntdev_mmu_ops = {
 579        .release                = mn_release,
 580        .invalidate_range_start = mn_invl_range_start,
 581};
 582
 583/* ------------------------------------------------------------------ */
 584
 585static int gntdev_open(struct inode *inode, struct file *flip)
 586{
 587        struct gntdev_priv *priv;
 588        int ret = 0;
 589
 590        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 591        if (!priv)
 592                return -ENOMEM;
 593
 594        INIT_LIST_HEAD(&priv->maps);
 595        INIT_LIST_HEAD(&priv->freeable_maps);
 596        mutex_init(&priv->lock);
 597
 598#ifdef CONFIG_XEN_GNTDEV_DMABUF
 599        priv->dmabuf_priv = gntdev_dmabuf_init(flip);
 600        if (IS_ERR(priv->dmabuf_priv)) {
 601                ret = PTR_ERR(priv->dmabuf_priv);
 602                kfree(priv);
 603                return ret;
 604        }
 605#endif
 606
 607        if (use_ptemod) {
 608                priv->mm = get_task_mm(current);
 609                if (!priv->mm) {
 610                        kfree(priv);
 611                        return -ENOMEM;
 612                }
 613                priv->mn.ops = &gntdev_mmu_ops;
 614                ret = mmu_notifier_register(&priv->mn, priv->mm);
 615                mmput(priv->mm);
 616        }
 617
 618        if (ret) {
 619                kfree(priv);
 620                return ret;
 621        }
 622
 623        flip->private_data = priv;
 624#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 625        priv->dma_dev = gntdev_miscdev.this_device;
 626        dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
 627#endif
 628        pr_debug("priv %p\n", priv);
 629
 630        return 0;
 631}
 632
 633static int gntdev_release(struct inode *inode, struct file *flip)
 634{
 635        struct gntdev_priv *priv = flip->private_data;
 636        struct gntdev_grant_map *map;
 637
 638        pr_debug("priv %p\n", priv);
 639
 640        mutex_lock(&priv->lock);
 641        while (!list_empty(&priv->maps)) {
 642                map = list_entry(priv->maps.next,
 643                                 struct gntdev_grant_map, next);
 644                list_del(&map->next);
 645                gntdev_put_map(NULL /* already removed */, map);
 646        }
 647        WARN_ON(!list_empty(&priv->freeable_maps));
 648        mutex_unlock(&priv->lock);
 649
 650#ifdef CONFIG_XEN_GNTDEV_DMABUF
 651        gntdev_dmabuf_fini(priv->dmabuf_priv);
 652#endif
 653
 654        if (use_ptemod)
 655                mmu_notifier_unregister(&priv->mn, priv->mm);
 656
 657        kfree(priv);
 658        return 0;
 659}
 660
 661static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 662                                       struct ioctl_gntdev_map_grant_ref __user *u)
 663{
 664        struct ioctl_gntdev_map_grant_ref op;
 665        struct gntdev_grant_map *map;
 666        int err;
 667
 668        if (copy_from_user(&op, u, sizeof(op)) != 0)
 669                return -EFAULT;
 670        pr_debug("priv %p, add %d\n", priv, op.count);
 671        if (unlikely(op.count <= 0))
 672                return -EINVAL;
 673
 674        err = -ENOMEM;
 675        map = gntdev_alloc_map(priv, op.count, 0 /* This is not a dma-buf. */);
 676        if (!map)
 677                return err;
 678
 679        if (unlikely(gntdev_account_mapped_pages(op.count))) {
 680                pr_debug("can't map: over limit\n");
 681                gntdev_put_map(NULL, map);
 682                return err;
 683        }
 684
 685        if (copy_from_user(map->grants, &u->refs,
 686                           sizeof(map->grants[0]) * op.count) != 0) {
 687                gntdev_put_map(NULL, map);
 688                return -EFAULT;
 689        }
 690
 691        mutex_lock(&priv->lock);
 692        gntdev_add_map(priv, map);
 693        op.index = map->index << PAGE_SHIFT;
 694        mutex_unlock(&priv->lock);
 695
 696        if (copy_to_user(u, &op, sizeof(op)) != 0)
 697                return -EFAULT;
 698
 699        return 0;
 700}
 701
 702static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
 703                                         struct ioctl_gntdev_unmap_grant_ref __user *u)
 704{
 705        struct ioctl_gntdev_unmap_grant_ref op;
 706        struct gntdev_grant_map *map;
 707        int err = -ENOENT;
 708
 709        if (copy_from_user(&op, u, sizeof(op)) != 0)
 710                return -EFAULT;
 711        pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count);
 712
 713        mutex_lock(&priv->lock);
 714        map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
 715        if (map) {
 716                list_del(&map->next);
 717                if (populate_freeable_maps)
 718                        list_add_tail(&map->next, &priv->freeable_maps);
 719                err = 0;
 720        }
 721        mutex_unlock(&priv->lock);
 722        if (map)
 723                gntdev_put_map(priv, map);
 724        return err;
 725}
 726
 727static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
 728                                              struct ioctl_gntdev_get_offset_for_vaddr __user *u)
 729{
 730        struct ioctl_gntdev_get_offset_for_vaddr op;
 731        struct vm_area_struct *vma;
 732        struct gntdev_grant_map *map;
 733        int rv = -EINVAL;
 734
 735        if (copy_from_user(&op, u, sizeof(op)) != 0)
 736                return -EFAULT;
 737        pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
 738
 739        down_read(&current->mm->mmap_sem);
 740        vma = find_vma(current->mm, op.vaddr);
 741        if (!vma || vma->vm_ops != &gntdev_vmops)
 742                goto out_unlock;
 743
 744        map = vma->vm_private_data;
 745        if (!map)
 746                goto out_unlock;
 747
 748        op.offset = map->index << PAGE_SHIFT;
 749        op.count = map->count;
 750        rv = 0;
 751
 752 out_unlock:
 753        up_read(&current->mm->mmap_sem);
 754
 755        if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
 756                return -EFAULT;
 757        return rv;
 758}
 759
 760static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 761{
 762        struct ioctl_gntdev_unmap_notify op;
 763        struct gntdev_grant_map *map;
 764        int rc;
 765        int out_flags;
 766        unsigned int out_event;
 767
 768        if (copy_from_user(&op, u, sizeof(op)))
 769                return -EFAULT;
 770
 771        if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
 772                return -EINVAL;
 773
 774        /* We need to grab a reference to the event channel we are going to use
 775         * to send the notify before releasing the reference we may already have
 776         * (if someone has called this ioctl twice). This is required so that
 777         * it is possible to change the clear_byte part of the notification
 778         * without disturbing the event channel part, which may now be the last
 779         * reference to that event channel.
 780         */
 781        if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
 782                if (evtchn_get(op.event_channel_port))
 783                        return -EINVAL;
 784        }
 785
 786        out_flags = op.action;
 787        out_event = op.event_channel_port;
 788
 789        mutex_lock(&priv->lock);
 790
 791        list_for_each_entry(map, &priv->maps, next) {
 792                uint64_t begin = map->index << PAGE_SHIFT;
 793                uint64_t end = (map->index + map->count) << PAGE_SHIFT;
 794                if (op.index >= begin && op.index < end)
 795                        goto found;
 796        }
 797        rc = -ENOENT;
 798        goto unlock_out;
 799
 800 found:
 801        if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
 802                        (map->flags & GNTMAP_readonly)) {
 803                rc = -EINVAL;
 804                goto unlock_out;
 805        }
 806
 807        out_flags = map->notify.flags;
 808        out_event = map->notify.event;
 809
 810        map->notify.flags = op.action;
 811        map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 812        map->notify.event = op.event_channel_port;
 813
 814        rc = 0;
 815
 816 unlock_out:
 817        mutex_unlock(&priv->lock);
 818
 819        /* Drop the reference to the event channel we did not save in the map */
 820        if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
 821                evtchn_put(out_event);
 822
 823        return rc;
 824}
 825
 826#define GNTDEV_COPY_BATCH 16
 827
 828struct gntdev_copy_batch {
 829        struct gnttab_copy ops[GNTDEV_COPY_BATCH];
 830        struct page *pages[GNTDEV_COPY_BATCH];
 831        s16 __user *status[GNTDEV_COPY_BATCH];
 832        unsigned int nr_ops;
 833        unsigned int nr_pages;
 834};
 835
 836static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
 837                           bool writeable, unsigned long *gfn)
 838{
 839        unsigned long addr = (unsigned long)virt;
 840        struct page *page;
 841        unsigned long xen_pfn;
 842        int ret;
 843
 844        ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
 845        if (ret < 0)
 846                return ret;
 847
 848        batch->pages[batch->nr_pages++] = page;
 849
 850        xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK);
 851        *gfn = pfn_to_gfn(xen_pfn);
 852
 853        return 0;
 854}
 855
 856static void gntdev_put_pages(struct gntdev_copy_batch *batch)
 857{
 858        unsigned int i;
 859
 860        for (i = 0; i < batch->nr_pages; i++)
 861                put_page(batch->pages[i]);
 862        batch->nr_pages = 0;
 863}
 864
 865static int gntdev_copy(struct gntdev_copy_batch *batch)
 866{
 867        unsigned int i;
 868
 869        gnttab_batch_copy(batch->ops, batch->nr_ops);
 870        gntdev_put_pages(batch);
 871
 872        /*
 873         * For each completed op, update the status if the op failed
 874         * and all previous ops for the segment were successful.
 875         */
 876        for (i = 0; i < batch->nr_ops; i++) {
 877                s16 status = batch->ops[i].status;
 878                s16 old_status;
 879
 880                if (status == GNTST_okay)
 881                        continue;
 882
 883                if (__get_user(old_status, batch->status[i]))
 884                        return -EFAULT;
 885
 886                if (old_status != GNTST_okay)
 887                        continue;
 888
 889                if (__put_user(status, batch->status[i]))
 890                        return -EFAULT;
 891        }
 892
 893        batch->nr_ops = 0;
 894        return 0;
 895}
 896
 897static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
 898                                 struct gntdev_grant_copy_segment *seg,
 899                                 s16 __user *status)
 900{
 901        uint16_t copied = 0;
 902
 903        /*
 904         * Disallow local -> local copies since there is only space in
 905         * batch->pages for one page per-op and this would be a very
 906         * expensive memcpy().
 907         */
 908        if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref)))
 909                return -EINVAL;
 910
 911        /* Can't cross page if source/dest is a grant ref. */
 912        if (seg->flags & GNTCOPY_source_gref) {
 913                if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE)
 914                        return -EINVAL;
 915        }
 916        if (seg->flags & GNTCOPY_dest_gref) {
 917                if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE)
 918                        return -EINVAL;
 919        }
 920
 921        if (put_user(GNTST_okay, status))
 922                return -EFAULT;
 923
 924        while (copied < seg->len) {
 925                struct gnttab_copy *op;
 926                void __user *virt;
 927                size_t len, off;
 928                unsigned long gfn;
 929                int ret;
 930
 931                if (batch->nr_ops >= GNTDEV_COPY_BATCH) {
 932                        ret = gntdev_copy(batch);
 933                        if (ret < 0)
 934                                return ret;
 935                }
 936
 937                len = seg->len - copied;
 938
 939                op = &batch->ops[batch->nr_ops];
 940                op->flags = 0;
 941
 942                if (seg->flags & GNTCOPY_source_gref) {
 943                        op->source.u.ref = seg->source.foreign.ref;
 944                        op->source.domid = seg->source.foreign.domid;
 945                        op->source.offset = seg->source.foreign.offset + copied;
 946                        op->flags |= GNTCOPY_source_gref;
 947                } else {
 948                        virt = seg->source.virt + copied;
 949                        off = (unsigned long)virt & ~XEN_PAGE_MASK;
 950                        len = min(len, (size_t)XEN_PAGE_SIZE - off);
 951
 952                        ret = gntdev_get_page(batch, virt, false, &gfn);
 953                        if (ret < 0)
 954                                return ret;
 955
 956                        op->source.u.gmfn = gfn;
 957                        op->source.domid = DOMID_SELF;
 958                        op->source.offset = off;
 959                }
 960
 961                if (seg->flags & GNTCOPY_dest_gref) {
 962                        op->dest.u.ref = seg->dest.foreign.ref;
 963                        op->dest.domid = seg->dest.foreign.domid;
 964                        op->dest.offset = seg->dest.foreign.offset + copied;
 965                        op->flags |= GNTCOPY_dest_gref;
 966                } else {
 967                        virt = seg->dest.virt + copied;
 968                        off = (unsigned long)virt & ~XEN_PAGE_MASK;
 969                        len = min(len, (size_t)XEN_PAGE_SIZE - off);
 970
 971                        ret = gntdev_get_page(batch, virt, true, &gfn);
 972                        if (ret < 0)
 973                                return ret;
 974
 975                        op->dest.u.gmfn = gfn;
 976                        op->dest.domid = DOMID_SELF;
 977                        op->dest.offset = off;
 978                }
 979
 980                op->len = len;
 981                copied += len;
 982
 983                batch->status[batch->nr_ops] = status;
 984                batch->nr_ops++;
 985        }
 986
 987        return 0;
 988}
 989
 990static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u)
 991{
 992        struct ioctl_gntdev_grant_copy copy;
 993        struct gntdev_copy_batch batch;
 994        unsigned int i;
 995        int ret = 0;
 996
 997        if (copy_from_user(&copy, u, sizeof(copy)))
 998                return -EFAULT;
 999
1000        batch.nr_ops = 0;
1001        batch.nr_pages = 0;
1002
1003        for (i = 0; i < copy.count; i++) {
1004                struct gntdev_grant_copy_segment seg;
1005
1006                if (copy_from_user(&seg, &copy.segments[i], sizeof(seg))) {
1007                        ret = -EFAULT;
1008                        goto out;
1009                }
1010
1011                ret = gntdev_grant_copy_seg(&batch, &seg, &copy.segments[i].status);
1012                if (ret < 0)
1013                        goto out;
1014
1015                cond_resched();
1016        }
1017        if (batch.nr_ops)
1018                ret = gntdev_copy(&batch);
1019        return ret;
1020
1021  out:
1022        gntdev_put_pages(&batch);
1023        return ret;
1024}
1025
1026static long gntdev_ioctl(struct file *flip,
1027                         unsigned int cmd, unsigned long arg)
1028{
1029        struct gntdev_priv *priv = flip->private_data;
1030        void __user *ptr = (void __user *)arg;
1031
1032        switch (cmd) {
1033        case IOCTL_GNTDEV_MAP_GRANT_REF:
1034                return gntdev_ioctl_map_grant_ref(priv, ptr);
1035
1036        case IOCTL_GNTDEV_UNMAP_GRANT_REF:
1037                return gntdev_ioctl_unmap_grant_ref(priv, ptr);
1038
1039        case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
1040                return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
1041
1042        case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
1043                return gntdev_ioctl_notify(priv, ptr);
1044
1045        case IOCTL_GNTDEV_GRANT_COPY:
1046                return gntdev_ioctl_grant_copy(priv, ptr);
1047
1048#ifdef CONFIG_XEN_GNTDEV_DMABUF
1049        case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS:
1050                return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr);
1051
1052        case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED:
1053                return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr);
1054
1055        case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS:
1056                return gntdev_ioctl_dmabuf_imp_to_refs(priv, ptr);
1057
1058        case IOCTL_GNTDEV_DMABUF_IMP_RELEASE:
1059                return gntdev_ioctl_dmabuf_imp_release(priv, ptr);
1060#endif
1061
1062        default:
1063                pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
1064                return -ENOIOCTLCMD;
1065        }
1066
1067        return 0;
1068}
1069
1070static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
1071{
1072        struct gntdev_priv *priv = flip->private_data;
1073        int index = vma->vm_pgoff;
1074        int count = vma_pages(vma);
1075        struct gntdev_grant_map *map;
1076        int err = -EINVAL;
1077
1078        if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
1079                return -EINVAL;
1080
1081        pr_debug("map %d+%d at %lx (pgoff %lx)\n",
1082                        index, count, vma->vm_start, vma->vm_pgoff);
1083
1084        mutex_lock(&priv->lock);
1085        map = gntdev_find_map_index(priv, index, count);
1086        if (!map)
1087                goto unlock_out;
1088        if (use_ptemod && map->vma)
1089                goto unlock_out;
1090        if (use_ptemod && priv->mm != vma->vm_mm) {
1091                pr_warn("Huh? Other mm?\n");
1092                goto unlock_out;
1093        }
1094
1095        refcount_inc(&map->users);
1096
1097        vma->vm_ops = &gntdev_vmops;
1098
1099        vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP;
1100
1101        if (use_ptemod)
1102                vma->vm_flags |= VM_DONTCOPY;
1103
1104        vma->vm_private_data = map;
1105
1106        if (use_ptemod)
1107                map->vma = vma;
1108
1109        if (map->flags) {
1110                if ((vma->vm_flags & VM_WRITE) &&
1111                                (map->flags & GNTMAP_readonly))
1112                        goto out_unlock_put;
1113        } else {
1114                map->flags = GNTMAP_host_map;
1115                if (!(vma->vm_flags & VM_WRITE))
1116                        map->flags |= GNTMAP_readonly;
1117        }
1118
1119        mutex_unlock(&priv->lock);
1120
1121        if (use_ptemod) {
1122                map->pages_vm_start = vma->vm_start;
1123                err = apply_to_page_range(vma->vm_mm, vma->vm_start,
1124                                          vma->vm_end - vma->vm_start,
1125                                          find_grant_ptes, map);
1126                if (err) {
1127                        pr_warn("find_grant_ptes() failure.\n");
1128                        goto out_put_map;
1129                }
1130        }
1131
1132        err = gntdev_map_grant_pages(map);
1133        if (err)
1134                goto out_put_map;
1135
1136        if (!use_ptemod) {
1137                err = vm_map_pages_zero(vma, map->pages, map->count);
1138                if (err)
1139                        goto out_put_map;
1140        } else {
1141#ifdef CONFIG_X86
1142                /*
1143                 * If the PTEs were not made special by the grant map
1144                 * hypercall, do so here.
1145                 *
1146                 * This is racy since the mapping is already visible
1147                 * to userspace but userspace should be well-behaved
1148                 * enough to not touch it until the mmap() call
1149                 * returns.
1150                 */
1151                if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
1152                        apply_to_page_range(vma->vm_mm, vma->vm_start,
1153                                            vma->vm_end - vma->vm_start,
1154                                            set_grant_ptes_as_special, NULL);
1155                }
1156#endif
1157        }
1158
1159        return 0;
1160
1161unlock_out:
1162        mutex_unlock(&priv->lock);
1163        return err;
1164
1165out_unlock_put:
1166        mutex_unlock(&priv->lock);
1167out_put_map:
1168        if (use_ptemod) {
1169                map->vma = NULL;
1170                unmap_grant_pages(map, 0, map->count);
1171        }
1172        gntdev_put_map(priv, map);
1173        return err;
1174}
1175
1176static const struct file_operations gntdev_fops = {
1177        .owner = THIS_MODULE,
1178        .open = gntdev_open,
1179        .release = gntdev_release,
1180        .mmap = gntdev_mmap,
1181        .unlocked_ioctl = gntdev_ioctl
1182};
1183
1184static struct miscdevice gntdev_miscdev = {
1185        .minor        = MISC_DYNAMIC_MINOR,
1186        .name         = "xen/gntdev",
1187        .fops         = &gntdev_fops,
1188};
1189
1190/* ------------------------------------------------------------------ */
1191
1192static int __init gntdev_init(void)
1193{
1194        int err;
1195
1196        if (!xen_domain())
1197                return -ENODEV;
1198
1199        use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
1200
1201        err = misc_register(&gntdev_miscdev);
1202        if (err != 0) {
1203                pr_err("Could not register gntdev device\n");
1204                return err;
1205        }
1206        return 0;
1207}
1208
1209static void __exit gntdev_exit(void)
1210{
1211        misc_deregister(&gntdev_miscdev);
1212}
1213
1214module_init(gntdev_init);
1215module_exit(gntdev_exit);
1216
1217/* ------------------------------------------------------------------ */
1218