linux/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2012-2014 Intel Corporation
   5 */
   6
   7#include <linux/mmu_context.h>
   8#include <linux/mmu_notifier.h>
   9#include <linux/mempolicy.h>
  10#include <linux/swap.h>
  11#include <linux/sched/mm.h>
  12
  13#include <drm/i915_drm.h>
  14
  15#include "i915_gem_ioctls.h"
  16#include "i915_gem_object.h"
  17#include "i915_scatterlist.h"
  18#include "i915_trace.h"
  19#include "intel_drv.h"
  20
  21struct i915_mm_struct {
  22        struct mm_struct *mm;
  23        struct drm_i915_private *i915;
  24        struct i915_mmu_notifier *mn;
  25        struct hlist_node node;
  26        struct kref kref;
  27        struct work_struct work;
  28};
  29
  30#if defined(CONFIG_MMU_NOTIFIER)
  31#include <linux/interval_tree.h>
  32
  33struct i915_mmu_notifier {
  34        spinlock_t lock;
  35        struct hlist_node node;
  36        struct mmu_notifier mn;
  37        struct rb_root_cached objects;
  38        struct i915_mm_struct *mm;
  39};
  40
  41struct i915_mmu_object {
  42        struct i915_mmu_notifier *mn;
  43        struct drm_i915_gem_object *obj;
  44        struct interval_tree_node it;
  45};
  46
  47static void add_object(struct i915_mmu_object *mo)
  48{
  49        GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
  50        interval_tree_insert(&mo->it, &mo->mn->objects);
  51}
  52
  53static void del_object(struct i915_mmu_object *mo)
  54{
  55        if (RB_EMPTY_NODE(&mo->it.rb))
  56                return;
  57
  58        interval_tree_remove(&mo->it, &mo->mn->objects);
  59        RB_CLEAR_NODE(&mo->it.rb);
  60}
  61
  62static void
  63__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
  64{
  65        struct i915_mmu_object *mo = obj->userptr.mmu_object;
  66
  67        /*
  68         * During mm_invalidate_range we need to cancel any userptr that
  69         * overlaps the range being invalidated. Doing so requires the
  70         * struct_mutex, and that risks recursion. In order to cause
  71         * recursion, the user must alias the userptr address space with
  72         * a GTT mmapping (possible with a MAP_FIXED) - then when we have
  73         * to invalidate that mmaping, mm_invalidate_range is called with
  74         * the userptr address *and* the struct_mutex held.  To prevent that
  75         * we set a flag under the i915_mmu_notifier spinlock to indicate
  76         * whether this object is valid.
  77         */
  78        if (!mo)
  79                return;
  80
  81        spin_lock(&mo->mn->lock);
  82        if (value)
  83                add_object(mo);
  84        else
  85                del_object(mo);
  86        spin_unlock(&mo->mn->lock);
  87}
  88
  89static int
  90userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  91                                  const struct mmu_notifier_range *range)
  92{
  93        struct i915_mmu_notifier *mn =
  94                container_of(_mn, struct i915_mmu_notifier, mn);
  95        struct interval_tree_node *it;
  96        struct mutex *unlock = NULL;
  97        unsigned long end;
  98        int ret = 0;
  99
 100        if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 101                return 0;
 102
 103        /* interval ranges are inclusive, but invalidate range is exclusive */
 104        end = range->end - 1;
 105
 106        spin_lock(&mn->lock);
 107        it = interval_tree_iter_first(&mn->objects, range->start, end);
 108        while (it) {
 109                struct drm_i915_gem_object *obj;
 110
 111                if (!mmu_notifier_range_blockable(range)) {
 112                        ret = -EAGAIN;
 113                        break;
 114                }
 115
 116                /*
 117                 * The mmu_object is released late when destroying the
 118                 * GEM object so it is entirely possible to gain a
 119                 * reference on an object in the process of being freed
 120                 * since our serialisation is via the spinlock and not
 121                 * the struct_mutex - and consequently use it after it
 122                 * is freed and then double free it. To prevent that
 123                 * use-after-free we only acquire a reference on the
 124                 * object if it is not in the process of being destroyed.
 125                 */
 126                obj = container_of(it, struct i915_mmu_object, it)->obj;
 127                if (!kref_get_unless_zero(&obj->base.refcount)) {
 128                        it = interval_tree_iter_next(it, range->start, end);
 129                        continue;
 130                }
 131                spin_unlock(&mn->lock);
 132
 133                if (!unlock) {
 134                        unlock = &mn->mm->i915->drm.struct_mutex;
 135
 136                        switch (mutex_trylock_recursive(unlock)) {
 137                        default:
 138                        case MUTEX_TRYLOCK_FAILED:
 139                                if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
 140                                        i915_gem_object_put(obj);
 141                                        return -EINTR;
 142                                }
 143                                /* fall through */
 144                        case MUTEX_TRYLOCK_SUCCESS:
 145                                break;
 146
 147                        case MUTEX_TRYLOCK_RECURSIVE:
 148                                unlock = ERR_PTR(-EEXIST);
 149                                break;
 150                        }
 151                }
 152
 153                ret = i915_gem_object_unbind(obj);
 154                if (ret == 0)
 155                        ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 156                i915_gem_object_put(obj);
 157                if (ret)
 158                        goto unlock;
 159
 160                spin_lock(&mn->lock);
 161
 162                /*
 163                 * As we do not (yet) protect the mmu from concurrent insertion
 164                 * over this range, there is no guarantee that this search will
 165                 * terminate given a pathologic workload.
 166                 */
 167                it = interval_tree_iter_first(&mn->objects, range->start, end);
 168        }
 169        spin_unlock(&mn->lock);
 170
 171unlock:
 172        if (!IS_ERR_OR_NULL(unlock))
 173                mutex_unlock(unlock);
 174
 175        return ret;
 176
 177}
 178
 179static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 180        .invalidate_range_start = userptr_mn_invalidate_range_start,
 181};
 182
 183static struct i915_mmu_notifier *
 184i915_mmu_notifier_create(struct i915_mm_struct *mm)
 185{
 186        struct i915_mmu_notifier *mn;
 187
 188        mn = kmalloc(sizeof(*mn), GFP_KERNEL);
 189        if (mn == NULL)
 190                return ERR_PTR(-ENOMEM);
 191
 192        spin_lock_init(&mn->lock);
 193        mn->mn.ops = &i915_gem_userptr_notifier;
 194        mn->objects = RB_ROOT_CACHED;
 195        mn->mm = mm;
 196
 197        return mn;
 198}
 199
 200static void
 201i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 202{
 203        struct i915_mmu_object *mo;
 204
 205        mo = fetch_and_zero(&obj->userptr.mmu_object);
 206        if (!mo)
 207                return;
 208
 209        spin_lock(&mo->mn->lock);
 210        del_object(mo);
 211        spin_unlock(&mo->mn->lock);
 212        kfree(mo);
 213}
 214
 215static struct i915_mmu_notifier *
 216i915_mmu_notifier_find(struct i915_mm_struct *mm)
 217{
 218        struct i915_mmu_notifier *mn;
 219        int err = 0;
 220
 221        mn = mm->mn;
 222        if (mn)
 223                return mn;
 224
 225        mn = i915_mmu_notifier_create(mm);
 226        if (IS_ERR(mn))
 227                err = PTR_ERR(mn);
 228
 229        down_write(&mm->mm->mmap_sem);
 230        mutex_lock(&mm->i915->mm_lock);
 231        if (mm->mn == NULL && !err) {
 232                /* Protected by mmap_sem (write-lock) */
 233                err = __mmu_notifier_register(&mn->mn, mm->mm);
 234                if (!err) {
 235                        /* Protected by mm_lock */
 236                        mm->mn = fetch_and_zero(&mn);
 237                }
 238        } else if (mm->mn) {
 239                /*
 240                 * Someone else raced and successfully installed the mmu
 241                 * notifier, we can cancel our own errors.
 242                 */
 243                err = 0;
 244        }
 245        mutex_unlock(&mm->i915->mm_lock);
 246        up_write(&mm->mm->mmap_sem);
 247
 248        if (mn && !IS_ERR(mn))
 249                kfree(mn);
 250
 251        return err ? ERR_PTR(err) : mm->mn;
 252}
 253
 254static int
 255i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 256                                    unsigned flags)
 257{
 258        struct i915_mmu_notifier *mn;
 259        struct i915_mmu_object *mo;
 260
 261        if (flags & I915_USERPTR_UNSYNCHRONIZED)
 262                return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 263
 264        if (WARN_ON(obj->userptr.mm == NULL))
 265                return -EINVAL;
 266
 267        mn = i915_mmu_notifier_find(obj->userptr.mm);
 268        if (IS_ERR(mn))
 269                return PTR_ERR(mn);
 270
 271        mo = kzalloc(sizeof(*mo), GFP_KERNEL);
 272        if (!mo)
 273                return -ENOMEM;
 274
 275        mo->mn = mn;
 276        mo->obj = obj;
 277        mo->it.start = obj->userptr.ptr;
 278        mo->it.last = obj->userptr.ptr + obj->base.size - 1;
 279        RB_CLEAR_NODE(&mo->it.rb);
 280
 281        obj->userptr.mmu_object = mo;
 282        return 0;
 283}
 284
 285static void
 286i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 287                       struct mm_struct *mm)
 288{
 289        if (mn == NULL)
 290                return;
 291
 292        mmu_notifier_unregister(&mn->mn, mm);
 293        kfree(mn);
 294}
 295
 296#else
 297
 298static void
 299__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 300{
 301}
 302
 303static void
 304i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 305{
 306}
 307
 308static int
 309i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 310                                    unsigned flags)
 311{
 312        if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
 313                return -ENODEV;
 314
 315        if (!capable(CAP_SYS_ADMIN))
 316                return -EPERM;
 317
 318        return 0;
 319}
 320
 321static void
 322i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 323                       struct mm_struct *mm)
 324{
 325}
 326
 327#endif
 328
 329static struct i915_mm_struct *
 330__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
 331{
 332        struct i915_mm_struct *mm;
 333
 334        /* Protected by dev_priv->mm_lock */
 335        hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
 336                if (mm->mm == real)
 337                        return mm;
 338
 339        return NULL;
 340}
 341
 342static int
 343i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 344{
 345        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 346        struct i915_mm_struct *mm;
 347        int ret = 0;
 348
 349        /* During release of the GEM object we hold the struct_mutex. This
 350         * precludes us from calling mmput() at that time as that may be
 351         * the last reference and so call exit_mmap(). exit_mmap() will
 352         * attempt to reap the vma, and if we were holding a GTT mmap
 353         * would then call drm_gem_vm_close() and attempt to reacquire
 354         * the struct mutex. So in order to avoid that recursion, we have
 355         * to defer releasing the mm reference until after we drop the
 356         * struct_mutex, i.e. we need to schedule a worker to do the clean
 357         * up.
 358         */
 359        mutex_lock(&dev_priv->mm_lock);
 360        mm = __i915_mm_struct_find(dev_priv, current->mm);
 361        if (mm == NULL) {
 362                mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 363                if (mm == NULL) {
 364                        ret = -ENOMEM;
 365                        goto out;
 366                }
 367
 368                kref_init(&mm->kref);
 369                mm->i915 = to_i915(obj->base.dev);
 370
 371                mm->mm = current->mm;
 372                mmgrab(current->mm);
 373
 374                mm->mn = NULL;
 375
 376                /* Protected by dev_priv->mm_lock */
 377                hash_add(dev_priv->mm_structs,
 378                         &mm->node, (unsigned long)mm->mm);
 379        } else
 380                kref_get(&mm->kref);
 381
 382        obj->userptr.mm = mm;
 383out:
 384        mutex_unlock(&dev_priv->mm_lock);
 385        return ret;
 386}
 387
 388static void
 389__i915_mm_struct_free__worker(struct work_struct *work)
 390{
 391        struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
 392        i915_mmu_notifier_free(mm->mn, mm->mm);
 393        mmdrop(mm->mm);
 394        kfree(mm);
 395}
 396
 397static void
 398__i915_mm_struct_free(struct kref *kref)
 399{
 400        struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
 401
 402        /* Protected by dev_priv->mm_lock */
 403        hash_del(&mm->node);
 404        mutex_unlock(&mm->i915->mm_lock);
 405
 406        INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
 407        queue_work(mm->i915->mm.userptr_wq, &mm->work);
 408}
 409
 410static void
 411i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
 412{
 413        if (obj->userptr.mm == NULL)
 414                return;
 415
 416        kref_put_mutex(&obj->userptr.mm->kref,
 417                       __i915_mm_struct_free,
 418                       &to_i915(obj->base.dev)->mm_lock);
 419        obj->userptr.mm = NULL;
 420}
 421
 422struct get_pages_work {
 423        struct work_struct work;
 424        struct drm_i915_gem_object *obj;
 425        struct task_struct *task;
 426};
 427
 428static struct sg_table *
 429__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 430                               struct page **pvec, int num_pages)
 431{
 432        unsigned int max_segment = i915_sg_segment_size();
 433        struct sg_table *st;
 434        unsigned int sg_page_sizes;
 435        int ret;
 436
 437        st = kmalloc(sizeof(*st), GFP_KERNEL);
 438        if (!st)
 439                return ERR_PTR(-ENOMEM);
 440
 441alloc_table:
 442        ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
 443                                          0, num_pages << PAGE_SHIFT,
 444                                          max_segment,
 445                                          GFP_KERNEL);
 446        if (ret) {
 447                kfree(st);
 448                return ERR_PTR(ret);
 449        }
 450
 451        ret = i915_gem_gtt_prepare_pages(obj, st);
 452        if (ret) {
 453                sg_free_table(st);
 454
 455                if (max_segment > PAGE_SIZE) {
 456                        max_segment = PAGE_SIZE;
 457                        goto alloc_table;
 458                }
 459
 460                kfree(st);
 461                return ERR_PTR(ret);
 462        }
 463
 464        sg_page_sizes = i915_sg_page_sizes(st->sgl);
 465
 466        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 467
 468        return st;
 469}
 470
 471static void
 472__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 473{
 474        struct get_pages_work *work = container_of(_work, typeof(*work), work);
 475        struct drm_i915_gem_object *obj = work->obj;
 476        const int npages = obj->base.size >> PAGE_SHIFT;
 477        struct page **pvec;
 478        int pinned, ret;
 479
 480        ret = -ENOMEM;
 481        pinned = 0;
 482
 483        pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 484        if (pvec != NULL) {
 485                struct mm_struct *mm = obj->userptr.mm->mm;
 486                unsigned int flags = 0;
 487
 488                if (!i915_gem_object_is_readonly(obj))
 489                        flags |= FOLL_WRITE;
 490
 491                ret = -EFAULT;
 492                if (mmget_not_zero(mm)) {
 493                        down_read(&mm->mmap_sem);
 494                        while (pinned < npages) {
 495                                ret = get_user_pages_remote
 496                                        (work->task, mm,
 497                                         obj->userptr.ptr + pinned * PAGE_SIZE,
 498                                         npages - pinned,
 499                                         flags,
 500                                         pvec + pinned, NULL, NULL);
 501                                if (ret < 0)
 502                                        break;
 503
 504                                pinned += ret;
 505                        }
 506                        up_read(&mm->mmap_sem);
 507                        mmput(mm);
 508                }
 509        }
 510
 511        mutex_lock(&obj->mm.lock);
 512        if (obj->userptr.work == &work->work) {
 513                struct sg_table *pages = ERR_PTR(ret);
 514
 515                if (pinned == npages) {
 516                        pages = __i915_gem_userptr_alloc_pages(obj, pvec,
 517                                                               npages);
 518                        if (!IS_ERR(pages)) {
 519                                pinned = 0;
 520                                pages = NULL;
 521                        }
 522                }
 523
 524                obj->userptr.work = ERR_CAST(pages);
 525                if (IS_ERR(pages))
 526                        __i915_gem_userptr_set_active(obj, false);
 527        }
 528        mutex_unlock(&obj->mm.lock);
 529
 530        release_pages(pvec, pinned);
 531        kvfree(pvec);
 532
 533        i915_gem_object_put(obj);
 534        put_task_struct(work->task);
 535        kfree(work);
 536}
 537
 538static struct sg_table *
 539__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 540{
 541        struct get_pages_work *work;
 542
 543        /* Spawn a worker so that we can acquire the
 544         * user pages without holding our mutex. Access
 545         * to the user pages requires mmap_sem, and we have
 546         * a strict lock ordering of mmap_sem, struct_mutex -
 547         * we already hold struct_mutex here and so cannot
 548         * call gup without encountering a lock inversion.
 549         *
 550         * Userspace will keep on repeating the operation
 551         * (thanks to EAGAIN) until either we hit the fast
 552         * path or the worker completes. If the worker is
 553         * cancelled or superseded, the task is still run
 554         * but the results ignored. (This leads to
 555         * complications that we may have a stray object
 556         * refcount that we need to be wary of when
 557         * checking for existing objects during creation.)
 558         * If the worker encounters an error, it reports
 559         * that error back to this function through
 560         * obj->userptr.work = ERR_PTR.
 561         */
 562        work = kmalloc(sizeof(*work), GFP_KERNEL);
 563        if (work == NULL)
 564                return ERR_PTR(-ENOMEM);
 565
 566        obj->userptr.work = &work->work;
 567
 568        work->obj = i915_gem_object_get(obj);
 569
 570        work->task = current;
 571        get_task_struct(work->task);
 572
 573        INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 574        queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 575
 576        return ERR_PTR(-EAGAIN);
 577}
 578
 579static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 580{
 581        const int num_pages = obj->base.size >> PAGE_SHIFT;
 582        struct mm_struct *mm = obj->userptr.mm->mm;
 583        struct page **pvec;
 584        struct sg_table *pages;
 585        bool active;
 586        int pinned;
 587
 588        /* If userspace should engineer that these pages are replaced in
 589         * the vma between us binding this page into the GTT and completion
 590         * of rendering... Their loss. If they change the mapping of their
 591         * pages they need to create a new bo to point to the new vma.
 592         *
 593         * However, that still leaves open the possibility of the vma
 594         * being copied upon fork. Which falls under the same userspace
 595         * synchronisation issue as a regular bo, except that this time
 596         * the process may not be expecting that a particular piece of
 597         * memory is tied to the GPU.
 598         *
 599         * Fortunately, we can hook into the mmu_notifier in order to
 600         * discard the page references prior to anything nasty happening
 601         * to the vma (discard or cloning) which should prevent the more
 602         * egregious cases from causing harm.
 603         */
 604
 605        if (obj->userptr.work) {
 606                /* active flag should still be held for the pending work */
 607                if (IS_ERR(obj->userptr.work))
 608                        return PTR_ERR(obj->userptr.work);
 609                else
 610                        return -EAGAIN;
 611        }
 612
 613        pvec = NULL;
 614        pinned = 0;
 615
 616        if (mm == current->mm) {
 617                pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 618                                      GFP_KERNEL |
 619                                      __GFP_NORETRY |
 620                                      __GFP_NOWARN);
 621                if (pvec) /* defer to worker if malloc fails */
 622                        pinned = __get_user_pages_fast(obj->userptr.ptr,
 623                                                       num_pages,
 624                                                       !i915_gem_object_is_readonly(obj),
 625                                                       pvec);
 626        }
 627
 628        active = false;
 629        if (pinned < 0) {
 630                pages = ERR_PTR(pinned);
 631                pinned = 0;
 632        } else if (pinned < num_pages) {
 633                pages = __i915_gem_userptr_get_pages_schedule(obj);
 634                active = pages == ERR_PTR(-EAGAIN);
 635        } else {
 636                pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
 637                active = !IS_ERR(pages);
 638        }
 639        if (active)
 640                __i915_gem_userptr_set_active(obj, true);
 641
 642        if (IS_ERR(pages))
 643                release_pages(pvec, pinned);
 644        kvfree(pvec);
 645
 646        return PTR_ERR_OR_ZERO(pages);
 647}
 648
 649static void
 650i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 651                           struct sg_table *pages)
 652{
 653        struct sgt_iter sgt_iter;
 654        struct page *page;
 655
 656        /* Cancel any inflight work and force them to restart their gup */
 657        obj->userptr.work = NULL;
 658        __i915_gem_userptr_set_active(obj, false);
 659        if (!pages)
 660                return;
 661
 662        __i915_gem_object_release_shmem(obj, pages, true);
 663        i915_gem_gtt_finish_pages(obj, pages);
 664
 665        for_each_sgt_page(page, sgt_iter, pages) {
 666                if (obj->mm.dirty)
 667                        set_page_dirty(page);
 668
 669                mark_page_accessed(page);
 670                put_page(page);
 671        }
 672        obj->mm.dirty = false;
 673
 674        sg_free_table(pages);
 675        kfree(pages);
 676}
 677
 678static void
 679i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 680{
 681        i915_gem_userptr_release__mmu_notifier(obj);
 682        i915_gem_userptr_release__mm_struct(obj);
 683}
 684
 685static int
 686i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 687{
 688        if (obj->userptr.mmu_object)
 689                return 0;
 690
 691        return i915_gem_userptr_init__mmu_notifier(obj, 0);
 692}
 693
 694static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 695        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 696                 I915_GEM_OBJECT_IS_SHRINKABLE |
 697                 I915_GEM_OBJECT_ASYNC_CANCEL,
 698        .get_pages = i915_gem_userptr_get_pages,
 699        .put_pages = i915_gem_userptr_put_pages,
 700        .dmabuf_export = i915_gem_userptr_dmabuf_export,
 701        .release = i915_gem_userptr_release,
 702};
 703
 704/*
 705 * Creates a new mm object that wraps some normal memory from the process
 706 * context - user memory.
 707 *
 708 * We impose several restrictions upon the memory being mapped
 709 * into the GPU.
 710 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
 711 * 2. It must be normal system memory, not a pointer into another map of IO
 712 *    space (e.g. it must not be a GTT mmapping of another object).
 713 * 3. We only allow a bo as large as we could in theory map into the GTT,
 714 *    that is we limit the size to the total size of the GTT.
 715 * 4. The bo is marked as being snoopable. The backing pages are left
 716 *    accessible directly by the CPU, but reads and writes by the GPU may
 717 *    incur the cost of a snoop (unless you have an LLC architecture).
 718 *
 719 * Synchronisation between multiple users and the GPU is left to userspace
 720 * through the normal set-domain-ioctl. The kernel will enforce that the
 721 * GPU relinquishes the VMA before it is returned back to the system
 722 * i.e. upon free(), munmap() or process termination. However, the userspace
 723 * malloc() library may not immediately relinquish the VMA after free() and
 724 * instead reuse it whilst the GPU is still reading and writing to the VMA.
 725 * Caveat emptor.
 726 *
 727 * Also note, that the object created here is not currently a "first class"
 728 * object, in that several ioctls are banned. These are the CPU access
 729 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
 730 * direct access via your pointer rather than use those ioctls. Another
 731 * restriction is that we do not allow userptr surfaces to be pinned to the
 732 * hardware and so we reject any attempt to create a framebuffer out of a
 733 * userptr.
 734 *
 735 * If you think this is a good interface to use to pass GPU memory between
 736 * drivers, please use dma-buf instead. In fact, wherever possible use
 737 * dma-buf instead.
 738 */
 739int
 740i915_gem_userptr_ioctl(struct drm_device *dev,
 741                       void *data,
 742                       struct drm_file *file)
 743{
 744        struct drm_i915_private *dev_priv = to_i915(dev);
 745        struct drm_i915_gem_userptr *args = data;
 746        struct drm_i915_gem_object *obj;
 747        int ret;
 748        u32 handle;
 749
 750        if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
 751                /* We cannot support coherent userptr objects on hw without
 752                 * LLC and broken snooping.
 753                 */
 754                return -ENODEV;
 755        }
 756
 757        if (args->flags & ~(I915_USERPTR_READ_ONLY |
 758                            I915_USERPTR_UNSYNCHRONIZED))
 759                return -EINVAL;
 760
 761        if (!args->user_size)
 762                return -EINVAL;
 763
 764        if (offset_in_page(args->user_ptr | args->user_size))
 765                return -EINVAL;
 766
 767        if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 768                return -EFAULT;
 769
 770        if (args->flags & I915_USERPTR_READ_ONLY) {
 771                struct i915_address_space *vm;
 772
 773                /*
 774                 * On almost all of the older hw, we cannot tell the GPU that
 775                 * a page is readonly.
 776                 */
 777                vm = dev_priv->kernel_context->vm;
 778                if (!vm || !vm->has_read_only)
 779                        return -ENODEV;
 780        }
 781
 782        obj = i915_gem_object_alloc();
 783        if (obj == NULL)
 784                return -ENOMEM;
 785
 786        drm_gem_private_object_init(dev, &obj->base, args->user_size);
 787        i915_gem_object_init(obj, &i915_gem_userptr_ops);
 788        obj->read_domains = I915_GEM_DOMAIN_CPU;
 789        obj->write_domain = I915_GEM_DOMAIN_CPU;
 790        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 791
 792        obj->userptr.ptr = args->user_ptr;
 793        if (args->flags & I915_USERPTR_READ_ONLY)
 794                i915_gem_object_set_readonly(obj);
 795
 796        /* And keep a pointer to the current->mm for resolving the user pages
 797         * at binding. This means that we need to hook into the mmu_notifier
 798         * in order to detect if the mmu is destroyed.
 799         */
 800        ret = i915_gem_userptr_init__mm_struct(obj);
 801        if (ret == 0)
 802                ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 803        if (ret == 0)
 804                ret = drm_gem_handle_create(file, &obj->base, &handle);
 805
 806        /* drop reference from allocate - handle holds it now */
 807        i915_gem_object_put(obj);
 808        if (ret)
 809                return ret;
 810
 811        args->handle = handle;
 812        return 0;
 813}
 814
 815int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
 816{
 817        mutex_init(&dev_priv->mm_lock);
 818        hash_init(dev_priv->mm_structs);
 819
 820        dev_priv->mm.userptr_wq =
 821                alloc_workqueue("i915-userptr-acquire",
 822                                WQ_HIGHPRI | WQ_UNBOUND,
 823                                0);
 824        if (!dev_priv->mm.userptr_wq)
 825                return -ENOMEM;
 826
 827        return 0;
 828}
 829
 830void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
 831{
 832        destroy_workqueue(dev_priv->mm.userptr_wq);
 833}
 834