linux/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2012-2014 Intel Corporation
   5 */
   6
   7#include <linux/mmu_context.h>
   8#include <linux/mmu_notifier.h>
   9#include <linux/mempolicy.h>
  10#include <linux/swap.h>
  11#include <linux/sched/mm.h>
  12
  13#include "i915_drv.h"
  14#include "i915_gem_ioctls.h"
  15#include "i915_gem_object.h"
  16#include "i915_scatterlist.h"
  17
  18struct i915_mm_struct {
  19        struct mm_struct *mm;
  20        struct drm_i915_private *i915;
  21        struct i915_mmu_notifier *mn;
  22        struct hlist_node node;
  23        struct kref kref;
  24        struct rcu_work work;
  25};
  26
  27#if defined(CONFIG_MMU_NOTIFIER)
  28#include <linux/interval_tree.h>
  29
  30struct i915_mmu_notifier {
  31        spinlock_t lock;
  32        struct hlist_node node;
  33        struct mmu_notifier mn;
  34        struct rb_root_cached objects;
  35        struct i915_mm_struct *mm;
  36};
  37
  38struct i915_mmu_object {
  39        struct i915_mmu_notifier *mn;
  40        struct drm_i915_gem_object *obj;
  41        struct interval_tree_node it;
  42};
  43
  44static void add_object(struct i915_mmu_object *mo)
  45{
  46        GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
  47        interval_tree_insert(&mo->it, &mo->mn->objects);
  48}
  49
  50static void del_object(struct i915_mmu_object *mo)
  51{
  52        if (RB_EMPTY_NODE(&mo->it.rb))
  53                return;
  54
  55        interval_tree_remove(&mo->it, &mo->mn->objects);
  56        RB_CLEAR_NODE(&mo->it.rb);
  57}
  58
  59static void
  60__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
  61{
  62        struct i915_mmu_object *mo = obj->userptr.mmu_object;
  63
  64        /*
  65         * During mm_invalidate_range we need to cancel any userptr that
  66         * overlaps the range being invalidated. Doing so requires the
  67         * struct_mutex, and that risks recursion. In order to cause
  68         * recursion, the user must alias the userptr address space with
  69         * a GTT mmapping (possible with a MAP_FIXED) - then when we have
  70         * to invalidate that mmaping, mm_invalidate_range is called with
  71         * the userptr address *and* the struct_mutex held.  To prevent that
  72         * we set a flag under the i915_mmu_notifier spinlock to indicate
  73         * whether this object is valid.
  74         */
  75        if (!mo)
  76                return;
  77
  78        spin_lock(&mo->mn->lock);
  79        if (value)
  80                add_object(mo);
  81        else
  82                del_object(mo);
  83        spin_unlock(&mo->mn->lock);
  84}
  85
  86static int
  87userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  88                                  const struct mmu_notifier_range *range)
  89{
  90        struct i915_mmu_notifier *mn =
  91                container_of(_mn, struct i915_mmu_notifier, mn);
  92        struct interval_tree_node *it;
  93        unsigned long end;
  94        int ret = 0;
  95
  96        if (RB_EMPTY_ROOT(&mn->objects.rb_root))
  97                return 0;
  98
  99        /* interval ranges are inclusive, but invalidate range is exclusive */
 100        end = range->end - 1;
 101
 102        spin_lock(&mn->lock);
 103        it = interval_tree_iter_first(&mn->objects, range->start, end);
 104        while (it) {
 105                struct drm_i915_gem_object *obj;
 106
 107                if (!mmu_notifier_range_blockable(range)) {
 108                        ret = -EAGAIN;
 109                        break;
 110                }
 111
 112                /*
 113                 * The mmu_object is released late when destroying the
 114                 * GEM object so it is entirely possible to gain a
 115                 * reference on an object in the process of being freed
 116                 * since our serialisation is via the spinlock and not
 117                 * the struct_mutex - and consequently use it after it
 118                 * is freed and then double free it. To prevent that
 119                 * use-after-free we only acquire a reference on the
 120                 * object if it is not in the process of being destroyed.
 121                 */
 122                obj = container_of(it, struct i915_mmu_object, it)->obj;
 123                if (!kref_get_unless_zero(&obj->base.refcount)) {
 124                        it = interval_tree_iter_next(it, range->start, end);
 125                        continue;
 126                }
 127                spin_unlock(&mn->lock);
 128
 129                ret = i915_gem_object_unbind(obj,
 130                                             I915_GEM_OBJECT_UNBIND_ACTIVE |
 131                                             I915_GEM_OBJECT_UNBIND_BARRIER);
 132                if (ret == 0)
 133                        ret = __i915_gem_object_put_pages(obj);
 134                i915_gem_object_put(obj);
 135                if (ret)
 136                        return ret;
 137
 138                spin_lock(&mn->lock);
 139
 140                /*
 141                 * As we do not (yet) protect the mmu from concurrent insertion
 142                 * over this range, there is no guarantee that this search will
 143                 * terminate given a pathologic workload.
 144                 */
 145                it = interval_tree_iter_first(&mn->objects, range->start, end);
 146        }
 147        spin_unlock(&mn->lock);
 148
 149        return ret;
 150
 151}
 152
 153static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 154        .invalidate_range_start = userptr_mn_invalidate_range_start,
 155};
 156
 157static struct i915_mmu_notifier *
 158i915_mmu_notifier_create(struct i915_mm_struct *mm)
 159{
 160        struct i915_mmu_notifier *mn;
 161
 162        mn = kmalloc(sizeof(*mn), GFP_KERNEL);
 163        if (mn == NULL)
 164                return ERR_PTR(-ENOMEM);
 165
 166        spin_lock_init(&mn->lock);
 167        mn->mn.ops = &i915_gem_userptr_notifier;
 168        mn->objects = RB_ROOT_CACHED;
 169        mn->mm = mm;
 170
 171        return mn;
 172}
 173
 174static void
 175i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 176{
 177        struct i915_mmu_object *mo;
 178
 179        mo = fetch_and_zero(&obj->userptr.mmu_object);
 180        if (!mo)
 181                return;
 182
 183        spin_lock(&mo->mn->lock);
 184        del_object(mo);
 185        spin_unlock(&mo->mn->lock);
 186        kfree(mo);
 187}
 188
 189static struct i915_mmu_notifier *
 190i915_mmu_notifier_find(struct i915_mm_struct *mm)
 191{
 192        struct i915_mmu_notifier *mn, *old;
 193        int err;
 194
 195        mn = READ_ONCE(mm->mn);
 196        if (likely(mn))
 197                return mn;
 198
 199        mn = i915_mmu_notifier_create(mm);
 200        if (IS_ERR(mn))
 201                return mn;
 202
 203        err = mmu_notifier_register(&mn->mn, mm->mm);
 204        if (err) {
 205                kfree(mn);
 206                return ERR_PTR(err);
 207        }
 208
 209        old = cmpxchg(&mm->mn, NULL, mn);
 210        if (old) {
 211                mmu_notifier_unregister(&mn->mn, mm->mm);
 212                kfree(mn);
 213                mn = old;
 214        }
 215
 216        return mn;
 217}
 218
 219static int
 220i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 221                                    unsigned flags)
 222{
 223        struct i915_mmu_notifier *mn;
 224        struct i915_mmu_object *mo;
 225
 226        if (flags & I915_USERPTR_UNSYNCHRONIZED)
 227                return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 228
 229        if (GEM_WARN_ON(!obj->userptr.mm))
 230                return -EINVAL;
 231
 232        mn = i915_mmu_notifier_find(obj->userptr.mm);
 233        if (IS_ERR(mn))
 234                return PTR_ERR(mn);
 235
 236        mo = kzalloc(sizeof(*mo), GFP_KERNEL);
 237        if (!mo)
 238                return -ENOMEM;
 239
 240        mo->mn = mn;
 241        mo->obj = obj;
 242        mo->it.start = obj->userptr.ptr;
 243        mo->it.last = obj->userptr.ptr + obj->base.size - 1;
 244        RB_CLEAR_NODE(&mo->it.rb);
 245
 246        obj->userptr.mmu_object = mo;
 247        return 0;
 248}
 249
 250static void
 251i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 252                       struct mm_struct *mm)
 253{
 254        if (mn == NULL)
 255                return;
 256
 257        mmu_notifier_unregister(&mn->mn, mm);
 258        kfree(mn);
 259}
 260
 261#else
 262
 263static void
 264__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 265{
 266}
 267
 268static void
 269i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 270{
 271}
 272
 273static int
 274i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 275                                    unsigned flags)
 276{
 277        if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
 278                return -ENODEV;
 279
 280        if (!capable(CAP_SYS_ADMIN))
 281                return -EPERM;
 282
 283        return 0;
 284}
 285
 286static void
 287i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
 288                       struct mm_struct *mm)
 289{
 290}
 291
 292#endif
 293
 294static struct i915_mm_struct *
 295__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real)
 296{
 297        struct i915_mm_struct *it, *mm = NULL;
 298
 299        rcu_read_lock();
 300        hash_for_each_possible_rcu(i915->mm_structs,
 301                                   it, node,
 302                                   (unsigned long)real)
 303                if (it->mm == real && kref_get_unless_zero(&it->kref)) {
 304                        mm = it;
 305                        break;
 306                }
 307        rcu_read_unlock();
 308
 309        return mm;
 310}
 311
 312static int
 313i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 314{
 315        struct drm_i915_private *i915 = to_i915(obj->base.dev);
 316        struct i915_mm_struct *mm, *new;
 317        int ret = 0;
 318
 319        /* During release of the GEM object we hold the struct_mutex. This
 320         * precludes us from calling mmput() at that time as that may be
 321         * the last reference and so call exit_mmap(). exit_mmap() will
 322         * attempt to reap the vma, and if we were holding a GTT mmap
 323         * would then call drm_gem_vm_close() and attempt to reacquire
 324         * the struct mutex. So in order to avoid that recursion, we have
 325         * to defer releasing the mm reference until after we drop the
 326         * struct_mutex, i.e. we need to schedule a worker to do the clean
 327         * up.
 328         */
 329        mm = __i915_mm_struct_find(i915, current->mm);
 330        if (mm)
 331                goto out;
 332
 333        new = kmalloc(sizeof(*mm), GFP_KERNEL);
 334        if (!new)
 335                return -ENOMEM;
 336
 337        kref_init(&new->kref);
 338        new->i915 = to_i915(obj->base.dev);
 339        new->mm = current->mm;
 340        new->mn = NULL;
 341
 342        spin_lock(&i915->mm_lock);
 343        mm = __i915_mm_struct_find(i915, current->mm);
 344        if (!mm) {
 345                hash_add_rcu(i915->mm_structs,
 346                             &new->node,
 347                             (unsigned long)new->mm);
 348                mmgrab(current->mm);
 349                mm = new;
 350        }
 351        spin_unlock(&i915->mm_lock);
 352        if (mm != new)
 353                kfree(new);
 354
 355out:
 356        obj->userptr.mm = mm;
 357        return ret;
 358}
 359
 360static void
 361__i915_mm_struct_free__worker(struct work_struct *work)
 362{
 363        struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work);
 364
 365        i915_mmu_notifier_free(mm->mn, mm->mm);
 366        mmdrop(mm->mm);
 367        kfree(mm);
 368}
 369
 370static void
 371__i915_mm_struct_free(struct kref *kref)
 372{
 373        struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
 374
 375        spin_lock(&mm->i915->mm_lock);
 376        hash_del_rcu(&mm->node);
 377        spin_unlock(&mm->i915->mm_lock);
 378
 379        INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker);
 380        queue_rcu_work(system_wq, &mm->work);
 381}
 382
 383static void
 384i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
 385{
 386        if (obj->userptr.mm == NULL)
 387                return;
 388
 389        kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free);
 390        obj->userptr.mm = NULL;
 391}
 392
 393struct get_pages_work {
 394        struct work_struct work;
 395        struct drm_i915_gem_object *obj;
 396        struct task_struct *task;
 397};
 398
 399static struct sg_table *
 400__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 401                               struct page **pvec, unsigned long num_pages)
 402{
 403        unsigned int max_segment = i915_sg_segment_size();
 404        struct sg_table *st;
 405        unsigned int sg_page_sizes;
 406        struct scatterlist *sg;
 407        int ret;
 408
 409        st = kmalloc(sizeof(*st), GFP_KERNEL);
 410        if (!st)
 411                return ERR_PTR(-ENOMEM);
 412
 413alloc_table:
 414        sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
 415                                         num_pages << PAGE_SHIFT, max_segment,
 416                                         NULL, 0, GFP_KERNEL);
 417        if (IS_ERR(sg)) {
 418                kfree(st);
 419                return ERR_CAST(sg);
 420        }
 421
 422        ret = i915_gem_gtt_prepare_pages(obj, st);
 423        if (ret) {
 424                sg_free_table(st);
 425
 426                if (max_segment > PAGE_SIZE) {
 427                        max_segment = PAGE_SIZE;
 428                        goto alloc_table;
 429                }
 430
 431                kfree(st);
 432                return ERR_PTR(ret);
 433        }
 434
 435        sg_page_sizes = i915_sg_page_sizes(st->sgl);
 436
 437        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 438
 439        return st;
 440}
 441
 442static void
 443__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 444{
 445        struct get_pages_work *work = container_of(_work, typeof(*work), work);
 446        struct drm_i915_gem_object *obj = work->obj;
 447        const unsigned long npages = obj->base.size >> PAGE_SHIFT;
 448        unsigned long pinned;
 449        struct page **pvec;
 450        int ret;
 451
 452        ret = -ENOMEM;
 453        pinned = 0;
 454
 455        pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 456        if (pvec != NULL) {
 457                struct mm_struct *mm = obj->userptr.mm->mm;
 458                unsigned int flags = 0;
 459                int locked = 0;
 460
 461                if (!i915_gem_object_is_readonly(obj))
 462                        flags |= FOLL_WRITE;
 463
 464                ret = -EFAULT;
 465                if (mmget_not_zero(mm)) {
 466                        while (pinned < npages) {
 467                                if (!locked) {
 468                                        mmap_read_lock(mm);
 469                                        locked = 1;
 470                                }
 471                                ret = pin_user_pages_remote
 472                                        (mm,
 473                                         obj->userptr.ptr + pinned * PAGE_SIZE,
 474                                         npages - pinned,
 475                                         flags,
 476                                         pvec + pinned, NULL, &locked);
 477                                if (ret < 0)
 478                                        break;
 479
 480                                pinned += ret;
 481                        }
 482                        if (locked)
 483                                mmap_read_unlock(mm);
 484                        mmput(mm);
 485                }
 486        }
 487
 488        mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
 489        if (obj->userptr.work == &work->work) {
 490                struct sg_table *pages = ERR_PTR(ret);
 491
 492                if (pinned == npages) {
 493                        pages = __i915_gem_userptr_alloc_pages(obj, pvec,
 494                                                               npages);
 495                        if (!IS_ERR(pages)) {
 496                                pinned = 0;
 497                                pages = NULL;
 498                        }
 499                }
 500
 501                obj->userptr.work = ERR_CAST(pages);
 502                if (IS_ERR(pages))
 503                        __i915_gem_userptr_set_active(obj, false);
 504        }
 505        mutex_unlock(&obj->mm.lock);
 506
 507        unpin_user_pages(pvec, pinned);
 508        kvfree(pvec);
 509
 510        i915_gem_object_put(obj);
 511        put_task_struct(work->task);
 512        kfree(work);
 513}
 514
 515static struct sg_table *
 516__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 517{
 518        struct get_pages_work *work;
 519
 520        /* Spawn a worker so that we can acquire the
 521         * user pages without holding our mutex. Access
 522         * to the user pages requires mmap_lock, and we have
 523         * a strict lock ordering of mmap_lock, struct_mutex -
 524         * we already hold struct_mutex here and so cannot
 525         * call gup without encountering a lock inversion.
 526         *
 527         * Userspace will keep on repeating the operation
 528         * (thanks to EAGAIN) until either we hit the fast
 529         * path or the worker completes. If the worker is
 530         * cancelled or superseded, the task is still run
 531         * but the results ignored. (This leads to
 532         * complications that we may have a stray object
 533         * refcount that we need to be wary of when
 534         * checking for existing objects during creation.)
 535         * If the worker encounters an error, it reports
 536         * that error back to this function through
 537         * obj->userptr.work = ERR_PTR.
 538         */
 539        work = kmalloc(sizeof(*work), GFP_KERNEL);
 540        if (work == NULL)
 541                return ERR_PTR(-ENOMEM);
 542
 543        obj->userptr.work = &work->work;
 544
 545        work->obj = i915_gem_object_get(obj);
 546
 547        work->task = current;
 548        get_task_struct(work->task);
 549
 550        INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 551        queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
 552
 553        return ERR_PTR(-EAGAIN);
 554}
 555
 556static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 557{
 558        const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
 559        struct mm_struct *mm = obj->userptr.mm->mm;
 560        struct page **pvec;
 561        struct sg_table *pages;
 562        bool active;
 563        int pinned;
 564        unsigned int gup_flags = 0;
 565
 566        /* If userspace should engineer that these pages are replaced in
 567         * the vma between us binding this page into the GTT and completion
 568         * of rendering... Their loss. If they change the mapping of their
 569         * pages they need to create a new bo to point to the new vma.
 570         *
 571         * However, that still leaves open the possibility of the vma
 572         * being copied upon fork. Which falls under the same userspace
 573         * synchronisation issue as a regular bo, except that this time
 574         * the process may not be expecting that a particular piece of
 575         * memory is tied to the GPU.
 576         *
 577         * Fortunately, we can hook into the mmu_notifier in order to
 578         * discard the page references prior to anything nasty happening
 579         * to the vma (discard or cloning) which should prevent the more
 580         * egregious cases from causing harm.
 581         */
 582
 583        if (obj->userptr.work) {
 584                /* active flag should still be held for the pending work */
 585                if (IS_ERR(obj->userptr.work))
 586                        return PTR_ERR(obj->userptr.work);
 587                else
 588                        return -EAGAIN;
 589        }
 590
 591        pvec = NULL;
 592        pinned = 0;
 593
 594        if (mm == current->mm) {
 595                pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 596                                      GFP_KERNEL |
 597                                      __GFP_NORETRY |
 598                                      __GFP_NOWARN);
 599                if (pvec) {
 600                        /* defer to worker if malloc fails */
 601                        if (!i915_gem_object_is_readonly(obj))
 602                                gup_flags |= FOLL_WRITE;
 603                        pinned = pin_user_pages_fast_only(obj->userptr.ptr,
 604                                                          num_pages, gup_flags,
 605                                                          pvec);
 606                }
 607        }
 608
 609        active = false;
 610        if (pinned < 0) {
 611                pages = ERR_PTR(pinned);
 612                pinned = 0;
 613        } else if (pinned < num_pages) {
 614                pages = __i915_gem_userptr_get_pages_schedule(obj);
 615                active = pages == ERR_PTR(-EAGAIN);
 616        } else {
 617                pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
 618                active = !IS_ERR(pages);
 619        }
 620        if (active)
 621                __i915_gem_userptr_set_active(obj, true);
 622
 623        if (IS_ERR(pages))
 624                unpin_user_pages(pvec, pinned);
 625        kvfree(pvec);
 626
 627        return PTR_ERR_OR_ZERO(pages);
 628}
 629
 630static void
 631i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 632                           struct sg_table *pages)
 633{
 634        struct sgt_iter sgt_iter;
 635        struct page *page;
 636
 637        /* Cancel any inflight work and force them to restart their gup */
 638        obj->userptr.work = NULL;
 639        __i915_gem_userptr_set_active(obj, false);
 640        if (!pages)
 641                return;
 642
 643        __i915_gem_object_release_shmem(obj, pages, true);
 644        i915_gem_gtt_finish_pages(obj, pages);
 645
 646        /*
 647         * We always mark objects as dirty when they are used by the GPU,
 648         * just in case. However, if we set the vma as being read-only we know
 649         * that the object will never have been written to.
 650         */
 651        if (i915_gem_object_is_readonly(obj))
 652                obj->mm.dirty = false;
 653
 654        for_each_sgt_page(page, sgt_iter, pages) {
 655                if (obj->mm.dirty && trylock_page(page)) {
 656                        /*
 657                         * As this may not be anonymous memory (e.g. shmem)
 658                         * but exist on a real mapping, we have to lock
 659                         * the page in order to dirty it -- holding
 660                         * the page reference is not sufficient to
 661                         * prevent the inode from being truncated.
 662                         * Play safe and take the lock.
 663                         *
 664                         * However...!
 665                         *
 666                         * The mmu-notifier can be invalidated for a
 667                         * migrate_page, that is alreadying holding the lock
 668                         * on the page. Such a try_to_unmap() will result
 669                         * in us calling put_pages() and so recursively try
 670                         * to lock the page. We avoid that deadlock with
 671                         * a trylock_page() and in exchange we risk missing
 672                         * some page dirtying.
 673                         */
 674                        set_page_dirty(page);
 675                        unlock_page(page);
 676                }
 677
 678                mark_page_accessed(page);
 679                unpin_user_page(page);
 680        }
 681        obj->mm.dirty = false;
 682
 683        sg_free_table(pages);
 684        kfree(pages);
 685}
 686
 687static void
 688i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 689{
 690        i915_gem_userptr_release__mmu_notifier(obj);
 691        i915_gem_userptr_release__mm_struct(obj);
 692}
 693
 694static int
 695i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 696{
 697        if (obj->userptr.mmu_object)
 698                return 0;
 699
 700        return i915_gem_userptr_init__mmu_notifier(obj, 0);
 701}
 702
 703static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
 704        .name = "i915_gem_object_userptr",
 705        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 706                 I915_GEM_OBJECT_IS_SHRINKABLE |
 707                 I915_GEM_OBJECT_NO_MMAP |
 708                 I915_GEM_OBJECT_ASYNC_CANCEL,
 709        .get_pages = i915_gem_userptr_get_pages,
 710        .put_pages = i915_gem_userptr_put_pages,
 711        .dmabuf_export = i915_gem_userptr_dmabuf_export,
 712        .release = i915_gem_userptr_release,
 713};
 714
 715/*
 716 * Creates a new mm object that wraps some normal memory from the process
 717 * context - user memory.
 718 *
 719 * We impose several restrictions upon the memory being mapped
 720 * into the GPU.
 721 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
 722 * 2. It must be normal system memory, not a pointer into another map of IO
 723 *    space (e.g. it must not be a GTT mmapping of another object).
 724 * 3. We only allow a bo as large as we could in theory map into the GTT,
 725 *    that is we limit the size to the total size of the GTT.
 726 * 4. The bo is marked as being snoopable. The backing pages are left
 727 *    accessible directly by the CPU, but reads and writes by the GPU may
 728 *    incur the cost of a snoop (unless you have an LLC architecture).
 729 *
 730 * Synchronisation between multiple users and the GPU is left to userspace
 731 * through the normal set-domain-ioctl. The kernel will enforce that the
 732 * GPU relinquishes the VMA before it is returned back to the system
 733 * i.e. upon free(), munmap() or process termination. However, the userspace
 734 * malloc() library may not immediately relinquish the VMA after free() and
 735 * instead reuse it whilst the GPU is still reading and writing to the VMA.
 736 * Caveat emptor.
 737 *
 738 * Also note, that the object created here is not currently a "first class"
 739 * object, in that several ioctls are banned. These are the CPU access
 740 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
 741 * direct access via your pointer rather than use those ioctls. Another
 742 * restriction is that we do not allow userptr surfaces to be pinned to the
 743 * hardware and so we reject any attempt to create a framebuffer out of a
 744 * userptr.
 745 *
 746 * If you think this is a good interface to use to pass GPU memory between
 747 * drivers, please use dma-buf instead. In fact, wherever possible use
 748 * dma-buf instead.
 749 */
 750int
 751i915_gem_userptr_ioctl(struct drm_device *dev,
 752                       void *data,
 753                       struct drm_file *file)
 754{
 755        static struct lock_class_key lock_class;
 756        struct drm_i915_private *dev_priv = to_i915(dev);
 757        struct drm_i915_gem_userptr *args = data;
 758        struct drm_i915_gem_object *obj;
 759        int ret;
 760        u32 handle;
 761
 762        if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
 763                /* We cannot support coherent userptr objects on hw without
 764                 * LLC and broken snooping.
 765                 */
 766                return -ENODEV;
 767        }
 768
 769        if (args->flags & ~(I915_USERPTR_READ_ONLY |
 770                            I915_USERPTR_UNSYNCHRONIZED))
 771                return -EINVAL;
 772
 773        /*
 774         * XXX: There is a prevalence of the assumption that we fit the
 775         * object's page count inside a 32bit _signed_ variable. Let's document
 776         * this and catch if we ever need to fix it. In the meantime, if you do
 777         * spot such a local variable, please consider fixing!
 778         *
 779         * Aside from our own locals (for which we have no excuse!):
 780         * - sg_table embeds unsigned int for num_pages
 781         * - get_user_pages*() mixed ints with longs
 782         */
 783
 784        if (args->user_size >> PAGE_SHIFT > INT_MAX)
 785                return -E2BIG;
 786
 787        if (overflows_type(args->user_size, obj->base.size))
 788                return -E2BIG;
 789
 790        if (!args->user_size)
 791                return -EINVAL;
 792
 793        if (offset_in_page(args->user_ptr | args->user_size))
 794                return -EINVAL;
 795
 796        if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 797                return -EFAULT;
 798
 799        if (args->flags & I915_USERPTR_READ_ONLY) {
 800                /*
 801                 * On almost all of the older hw, we cannot tell the GPU that
 802                 * a page is readonly.
 803                 */
 804                if (!dev_priv->gt.vm->has_read_only)
 805                        return -ENODEV;
 806        }
 807
 808        obj = i915_gem_object_alloc();
 809        if (obj == NULL)
 810                return -ENOMEM;
 811
 812        drm_gem_private_object_init(dev, &obj->base, args->user_size);
 813        i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
 814        obj->read_domains = I915_GEM_DOMAIN_CPU;
 815        obj->write_domain = I915_GEM_DOMAIN_CPU;
 816        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 817
 818        obj->userptr.ptr = args->user_ptr;
 819        if (args->flags & I915_USERPTR_READ_ONLY)
 820                i915_gem_object_set_readonly(obj);
 821
 822        /* And keep a pointer to the current->mm for resolving the user pages
 823         * at binding. This means that we need to hook into the mmu_notifier
 824         * in order to detect if the mmu is destroyed.
 825         */
 826        ret = i915_gem_userptr_init__mm_struct(obj);
 827        if (ret == 0)
 828                ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 829        if (ret == 0)
 830                ret = drm_gem_handle_create(file, &obj->base, &handle);
 831
 832        /* drop reference from allocate - handle holds it now */
 833        i915_gem_object_put(obj);
 834        if (ret)
 835                return ret;
 836
 837        args->handle = handle;
 838        return 0;
 839}
 840
 841int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
 842{
 843        spin_lock_init(&dev_priv->mm_lock);
 844        hash_init(dev_priv->mm_structs);
 845
 846        dev_priv->mm.userptr_wq =
 847                alloc_workqueue("i915-userptr-acquire",
 848                                WQ_HIGHPRI | WQ_UNBOUND,
 849                                0);
 850        if (!dev_priv->mm.userptr_wq)
 851                return -ENOMEM;
 852
 853        return 0;
 854}
 855
 856void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
 857{
 858        destroy_workqueue(dev_priv->mm.userptr_wq);
 859}
 860