linux/drivers/iommu/iova.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2009, Intel Corporation.
   4 *
   5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   6 */
   7
   8#include <linux/iova.h>
   9#include <linux/module.h>
  10#include <linux/slab.h>
  11#include <linux/smp.h>
  12#include <linux/bitops.h>
  13#include <linux/cpu.h>
  14
  15/* The anchor node sits above the top of the usable address space */
  16#define IOVA_ANCHOR     ~0UL
  17
  18static bool iova_rcache_insert(struct iova_domain *iovad,
  19                               unsigned long pfn,
  20                               unsigned long size);
  21static unsigned long iova_rcache_get(struct iova_domain *iovad,
  22                                     unsigned long size,
  23                                     unsigned long limit_pfn);
  24static void init_iova_rcaches(struct iova_domain *iovad);
  25static void free_iova_rcaches(struct iova_domain *iovad);
  26static void fq_destroy_all_entries(struct iova_domain *iovad);
  27static void fq_flush_timeout(struct timer_list *t);
  28
  29void
  30init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  31        unsigned long start_pfn)
  32{
  33        /*
  34         * IOVA granularity will normally be equal to the smallest
  35         * supported IOMMU page size; both *must* be capable of
  36         * representing individual CPU pages exactly.
  37         */
  38        BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  39
  40        spin_lock_init(&iovad->iova_rbtree_lock);
  41        iovad->rbroot = RB_ROOT;
  42        iovad->cached_node = &iovad->anchor.node;
  43        iovad->cached32_node = &iovad->anchor.node;
  44        iovad->granule = granule;
  45        iovad->start_pfn = start_pfn;
  46        iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  47        iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  48        iovad->flush_cb = NULL;
  49        iovad->fq = NULL;
  50        iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  51        rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  52        rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  53        init_iova_rcaches(iovad);
  54}
  55EXPORT_SYMBOL_GPL(init_iova_domain);
  56
  57bool has_iova_flush_queue(struct iova_domain *iovad)
  58{
  59        return !!iovad->fq;
  60}
  61
  62static void free_iova_flush_queue(struct iova_domain *iovad)
  63{
  64        if (!has_iova_flush_queue(iovad))
  65                return;
  66
  67        if (timer_pending(&iovad->fq_timer))
  68                del_timer(&iovad->fq_timer);
  69
  70        fq_destroy_all_entries(iovad);
  71
  72        free_percpu(iovad->fq);
  73
  74        iovad->fq         = NULL;
  75        iovad->flush_cb   = NULL;
  76        iovad->entry_dtor = NULL;
  77}
  78
  79int init_iova_flush_queue(struct iova_domain *iovad,
  80                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
  81{
  82        struct iova_fq __percpu *queue;
  83        int cpu;
  84
  85        atomic64_set(&iovad->fq_flush_start_cnt,  0);
  86        atomic64_set(&iovad->fq_flush_finish_cnt, 0);
  87
  88        queue = alloc_percpu(struct iova_fq);
  89        if (!queue)
  90                return -ENOMEM;
  91
  92        iovad->flush_cb   = flush_cb;
  93        iovad->entry_dtor = entry_dtor;
  94
  95        for_each_possible_cpu(cpu) {
  96                struct iova_fq *fq;
  97
  98                fq = per_cpu_ptr(queue, cpu);
  99                fq->head = 0;
 100                fq->tail = 0;
 101
 102                spin_lock_init(&fq->lock);
 103        }
 104
 105        smp_wmb();
 106
 107        iovad->fq = queue;
 108
 109        timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 110        atomic_set(&iovad->fq_timer_on, 0);
 111
 112        return 0;
 113}
 114EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 115
 116static struct rb_node *
 117__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 118{
 119        if (limit_pfn <= iovad->dma_32bit_pfn)
 120                return iovad->cached32_node;
 121
 122        return iovad->cached_node;
 123}
 124
 125static void
 126__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 127{
 128        if (new->pfn_hi < iovad->dma_32bit_pfn)
 129                iovad->cached32_node = &new->node;
 130        else
 131                iovad->cached_node = &new->node;
 132}
 133
 134static void
 135__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 136{
 137        struct iova *cached_iova;
 138
 139        cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
 140        if (free == cached_iova ||
 141            (free->pfn_hi < iovad->dma_32bit_pfn &&
 142             free->pfn_lo >= cached_iova->pfn_lo)) {
 143                iovad->cached32_node = rb_next(&free->node);
 144                iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 145        }
 146
 147        cached_iova = rb_entry(iovad->cached_node, struct iova, node);
 148        if (free->pfn_lo >= cached_iova->pfn_lo)
 149                iovad->cached_node = rb_next(&free->node);
 150}
 151
 152/* Insert the iova into domain rbtree by holding writer lock */
 153static void
 154iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 155                   struct rb_node *start)
 156{
 157        struct rb_node **new, *parent = NULL;
 158
 159        new = (start) ? &start : &(root->rb_node);
 160        /* Figure out where to put new node */
 161        while (*new) {
 162                struct iova *this = rb_entry(*new, struct iova, node);
 163
 164                parent = *new;
 165
 166                if (iova->pfn_lo < this->pfn_lo)
 167                        new = &((*new)->rb_left);
 168                else if (iova->pfn_lo > this->pfn_lo)
 169                        new = &((*new)->rb_right);
 170                else {
 171                        WARN_ON(1); /* this should not happen */
 172                        return;
 173                }
 174        }
 175        /* Add new node and rebalance tree. */
 176        rb_link_node(&iova->node, parent, new);
 177        rb_insert_color(&iova->node, root);
 178}
 179
 180static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 181                unsigned long size, unsigned long limit_pfn,
 182                        struct iova *new, bool size_aligned)
 183{
 184        struct rb_node *curr, *prev;
 185        struct iova *curr_iova;
 186        unsigned long flags;
 187        unsigned long new_pfn;
 188        unsigned long align_mask = ~0UL;
 189
 190        if (size_aligned)
 191                align_mask <<= fls_long(size - 1);
 192
 193        /* Walk the tree backwards */
 194        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 195        if (limit_pfn <= iovad->dma_32bit_pfn &&
 196                        size >= iovad->max32_alloc_size)
 197                goto iova32_full;
 198
 199        curr = __get_cached_rbnode(iovad, limit_pfn);
 200        curr_iova = rb_entry(curr, struct iova, node);
 201        do {
 202                limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
 203                new_pfn = (limit_pfn - size) & align_mask;
 204                prev = curr;
 205                curr = rb_prev(curr);
 206                curr_iova = rb_entry(curr, struct iova, node);
 207        } while (curr && new_pfn <= curr_iova->pfn_hi);
 208
 209        if (limit_pfn < size || new_pfn < iovad->start_pfn) {
 210                iovad->max32_alloc_size = size;
 211                goto iova32_full;
 212        }
 213
 214        /* pfn_lo will point to size aligned address if size_aligned is set */
 215        new->pfn_lo = new_pfn;
 216        new->pfn_hi = new->pfn_lo + size - 1;
 217
 218        /* If we have 'prev', it's a valid place to start the insertion. */
 219        iova_insert_rbtree(&iovad->rbroot, new, prev);
 220        __cached_rbnode_insert_update(iovad, new);
 221
 222        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 223        return 0;
 224
 225iova32_full:
 226        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 227        return -ENOMEM;
 228}
 229
 230static struct kmem_cache *iova_cache;
 231static unsigned int iova_cache_users;
 232static DEFINE_MUTEX(iova_cache_mutex);
 233
 234struct iova *alloc_iova_mem(void)
 235{
 236        return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
 237}
 238EXPORT_SYMBOL(alloc_iova_mem);
 239
 240void free_iova_mem(struct iova *iova)
 241{
 242        if (iova->pfn_lo != IOVA_ANCHOR)
 243                kmem_cache_free(iova_cache, iova);
 244}
 245EXPORT_SYMBOL(free_iova_mem);
 246
 247int iova_cache_get(void)
 248{
 249        mutex_lock(&iova_cache_mutex);
 250        if (!iova_cache_users) {
 251                iova_cache = kmem_cache_create(
 252                        "iommu_iova", sizeof(struct iova), 0,
 253                        SLAB_HWCACHE_ALIGN, NULL);
 254                if (!iova_cache) {
 255                        mutex_unlock(&iova_cache_mutex);
 256                        printk(KERN_ERR "Couldn't create iova cache\n");
 257                        return -ENOMEM;
 258                }
 259        }
 260
 261        iova_cache_users++;
 262        mutex_unlock(&iova_cache_mutex);
 263
 264        return 0;
 265}
 266EXPORT_SYMBOL_GPL(iova_cache_get);
 267
 268void iova_cache_put(void)
 269{
 270        mutex_lock(&iova_cache_mutex);
 271        if (WARN_ON(!iova_cache_users)) {
 272                mutex_unlock(&iova_cache_mutex);
 273                return;
 274        }
 275        iova_cache_users--;
 276        if (!iova_cache_users)
 277                kmem_cache_destroy(iova_cache);
 278        mutex_unlock(&iova_cache_mutex);
 279}
 280EXPORT_SYMBOL_GPL(iova_cache_put);
 281
 282/**
 283 * alloc_iova - allocates an iova
 284 * @iovad: - iova domain in question
 285 * @size: - size of page frames to allocate
 286 * @limit_pfn: - max limit address
 287 * @size_aligned: - set if size_aligned address range is required
 288 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 289 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 290 * flag is set then the allocated address iova->pfn_lo will be naturally
 291 * aligned on roundup_power_of_two(size).
 292 */
 293struct iova *
 294alloc_iova(struct iova_domain *iovad, unsigned long size,
 295        unsigned long limit_pfn,
 296        bool size_aligned)
 297{
 298        struct iova *new_iova;
 299        int ret;
 300
 301        new_iova = alloc_iova_mem();
 302        if (!new_iova)
 303                return NULL;
 304
 305        ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 306                        new_iova, size_aligned);
 307
 308        if (ret) {
 309                free_iova_mem(new_iova);
 310                return NULL;
 311        }
 312
 313        return new_iova;
 314}
 315EXPORT_SYMBOL_GPL(alloc_iova);
 316
 317static struct iova *
 318private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 319{
 320        struct rb_node *node = iovad->rbroot.rb_node;
 321
 322        assert_spin_locked(&iovad->iova_rbtree_lock);
 323
 324        while (node) {
 325                struct iova *iova = rb_entry(node, struct iova, node);
 326
 327                if (pfn < iova->pfn_lo)
 328                        node = node->rb_left;
 329                else if (pfn > iova->pfn_hi)
 330                        node = node->rb_right;
 331                else
 332                        return iova;    /* pfn falls within iova's range */
 333        }
 334
 335        return NULL;
 336}
 337
 338static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
 339{
 340        assert_spin_locked(&iovad->iova_rbtree_lock);
 341        __cached_rbnode_delete_update(iovad, iova);
 342        rb_erase(&iova->node, &iovad->rbroot);
 343        free_iova_mem(iova);
 344}
 345
 346/**
 347 * find_iova - finds an iova for a given pfn
 348 * @iovad: - iova domain in question.
 349 * @pfn: - page frame number
 350 * This function finds and returns an iova belonging to the
 351 * given doamin which matches the given pfn.
 352 */
 353struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 354{
 355        unsigned long flags;
 356        struct iova *iova;
 357
 358        /* Take the lock so that no other thread is manipulating the rbtree */
 359        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 360        iova = private_find_iova(iovad, pfn);
 361        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 362        return iova;
 363}
 364EXPORT_SYMBOL_GPL(find_iova);
 365
 366/**
 367 * __free_iova - frees the given iova
 368 * @iovad: iova domain in question.
 369 * @iova: iova in question.
 370 * Frees the given iova belonging to the giving domain
 371 */
 372void
 373__free_iova(struct iova_domain *iovad, struct iova *iova)
 374{
 375        unsigned long flags;
 376
 377        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 378        private_free_iova(iovad, iova);
 379        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 380}
 381EXPORT_SYMBOL_GPL(__free_iova);
 382
 383/**
 384 * free_iova - finds and frees the iova for a given pfn
 385 * @iovad: - iova domain in question.
 386 * @pfn: - pfn that is allocated previously
 387 * This functions finds an iova for a given pfn and then
 388 * frees the iova from that domain.
 389 */
 390void
 391free_iova(struct iova_domain *iovad, unsigned long pfn)
 392{
 393        struct iova *iova = find_iova(iovad, pfn);
 394
 395        if (iova)
 396                __free_iova(iovad, iova);
 397
 398}
 399EXPORT_SYMBOL_GPL(free_iova);
 400
 401/**
 402 * alloc_iova_fast - allocates an iova from rcache
 403 * @iovad: - iova domain in question
 404 * @size: - size of page frames to allocate
 405 * @limit_pfn: - max limit address
 406 * @flush_rcache: - set to flush rcache on regular allocation failure
 407 * This function tries to satisfy an iova allocation from the rcache,
 408 * and falls back to regular allocation on failure. If regular allocation
 409 * fails too and the flush_rcache flag is set then the rcache will be flushed.
 410*/
 411unsigned long
 412alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 413                unsigned long limit_pfn, bool flush_rcache)
 414{
 415        unsigned long iova_pfn;
 416        struct iova *new_iova;
 417
 418        iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
 419        if (iova_pfn)
 420                return iova_pfn;
 421
 422retry:
 423        new_iova = alloc_iova(iovad, size, limit_pfn, true);
 424        if (!new_iova) {
 425                unsigned int cpu;
 426
 427                if (!flush_rcache)
 428                        return 0;
 429
 430                /* Try replenishing IOVAs by flushing rcache. */
 431                flush_rcache = false;
 432                for_each_online_cpu(cpu)
 433                        free_cpu_cached_iovas(cpu, iovad);
 434                goto retry;
 435        }
 436
 437        return new_iova->pfn_lo;
 438}
 439EXPORT_SYMBOL_GPL(alloc_iova_fast);
 440
 441/**
 442 * free_iova_fast - free iova pfn range into rcache
 443 * @iovad: - iova domain in question.
 444 * @pfn: - pfn that is allocated previously
 445 * @size: - # of pages in range
 446 * This functions frees an iova range by trying to put it into the rcache,
 447 * falling back to regular iova deallocation via free_iova() if this fails.
 448 */
 449void
 450free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 451{
 452        if (iova_rcache_insert(iovad, pfn, size))
 453                return;
 454
 455        free_iova(iovad, pfn);
 456}
 457EXPORT_SYMBOL_GPL(free_iova_fast);
 458
 459#define fq_ring_for_each(i, fq) \
 460        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 461
 462static inline bool fq_full(struct iova_fq *fq)
 463{
 464        assert_spin_locked(&fq->lock);
 465        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 466}
 467
 468static inline unsigned fq_ring_add(struct iova_fq *fq)
 469{
 470        unsigned idx = fq->tail;
 471
 472        assert_spin_locked(&fq->lock);
 473
 474        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 475
 476        return idx;
 477}
 478
 479static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 480{
 481        u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 482        unsigned idx;
 483
 484        assert_spin_locked(&fq->lock);
 485
 486        fq_ring_for_each(idx, fq) {
 487
 488                if (fq->entries[idx].counter >= counter)
 489                        break;
 490
 491                if (iovad->entry_dtor)
 492                        iovad->entry_dtor(fq->entries[idx].data);
 493
 494                free_iova_fast(iovad,
 495                               fq->entries[idx].iova_pfn,
 496                               fq->entries[idx].pages);
 497
 498                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 499        }
 500}
 501
 502static void iova_domain_flush(struct iova_domain *iovad)
 503{
 504        atomic64_inc(&iovad->fq_flush_start_cnt);
 505        iovad->flush_cb(iovad);
 506        atomic64_inc(&iovad->fq_flush_finish_cnt);
 507}
 508
 509static void fq_destroy_all_entries(struct iova_domain *iovad)
 510{
 511        int cpu;
 512
 513        /*
 514         * This code runs when the iova_domain is being detroyed, so don't
 515         * bother to free iovas, just call the entry_dtor on all remaining
 516         * entries.
 517         */
 518        if (!iovad->entry_dtor)
 519                return;
 520
 521        for_each_possible_cpu(cpu) {
 522                struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
 523                int idx;
 524
 525                fq_ring_for_each(idx, fq)
 526                        iovad->entry_dtor(fq->entries[idx].data);
 527        }
 528}
 529
 530static void fq_flush_timeout(struct timer_list *t)
 531{
 532        struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
 533        int cpu;
 534
 535        atomic_set(&iovad->fq_timer_on, 0);
 536        iova_domain_flush(iovad);
 537
 538        for_each_possible_cpu(cpu) {
 539                unsigned long flags;
 540                struct iova_fq *fq;
 541
 542                fq = per_cpu_ptr(iovad->fq, cpu);
 543                spin_lock_irqsave(&fq->lock, flags);
 544                fq_ring_free(iovad, fq);
 545                spin_unlock_irqrestore(&fq->lock, flags);
 546        }
 547}
 548
 549void queue_iova(struct iova_domain *iovad,
 550                unsigned long pfn, unsigned long pages,
 551                unsigned long data)
 552{
 553        struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
 554        unsigned long flags;
 555        unsigned idx;
 556
 557        spin_lock_irqsave(&fq->lock, flags);
 558
 559        /*
 560         * First remove all entries from the flush queue that have already been
 561         * flushed out on another CPU. This makes the fq_full() check below less
 562         * likely to be true.
 563         */
 564        fq_ring_free(iovad, fq);
 565
 566        if (fq_full(fq)) {
 567                iova_domain_flush(iovad);
 568                fq_ring_free(iovad, fq);
 569        }
 570
 571        idx = fq_ring_add(fq);
 572
 573        fq->entries[idx].iova_pfn = pfn;
 574        fq->entries[idx].pages    = pages;
 575        fq->entries[idx].data     = data;
 576        fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 577
 578        spin_unlock_irqrestore(&fq->lock, flags);
 579
 580        if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
 581                mod_timer(&iovad->fq_timer,
 582                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 583}
 584EXPORT_SYMBOL_GPL(queue_iova);
 585
 586/**
 587 * put_iova_domain - destroys the iova doamin
 588 * @iovad: - iova domain in question.
 589 * All the iova's in that domain are destroyed.
 590 */
 591void put_iova_domain(struct iova_domain *iovad)
 592{
 593        struct iova *iova, *tmp;
 594
 595        free_iova_flush_queue(iovad);
 596        free_iova_rcaches(iovad);
 597        rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
 598                free_iova_mem(iova);
 599}
 600EXPORT_SYMBOL_GPL(put_iova_domain);
 601
 602static int
 603__is_range_overlap(struct rb_node *node,
 604        unsigned long pfn_lo, unsigned long pfn_hi)
 605{
 606        struct iova *iova = rb_entry(node, struct iova, node);
 607
 608        if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 609                return 1;
 610        return 0;
 611}
 612
 613static inline struct iova *
 614alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 615{
 616        struct iova *iova;
 617
 618        iova = alloc_iova_mem();
 619        if (iova) {
 620                iova->pfn_lo = pfn_lo;
 621                iova->pfn_hi = pfn_hi;
 622        }
 623
 624        return iova;
 625}
 626
 627static struct iova *
 628__insert_new_range(struct iova_domain *iovad,
 629        unsigned long pfn_lo, unsigned long pfn_hi)
 630{
 631        struct iova *iova;
 632
 633        iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 634        if (iova)
 635                iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 636
 637        return iova;
 638}
 639
 640static void
 641__adjust_overlap_range(struct iova *iova,
 642        unsigned long *pfn_lo, unsigned long *pfn_hi)
 643{
 644        if (*pfn_lo < iova->pfn_lo)
 645                iova->pfn_lo = *pfn_lo;
 646        if (*pfn_hi > iova->pfn_hi)
 647                *pfn_lo = iova->pfn_hi + 1;
 648}
 649
 650/**
 651 * reserve_iova - reserves an iova in the given range
 652 * @iovad: - iova domain pointer
 653 * @pfn_lo: - lower page frame address
 654 * @pfn_hi:- higher pfn adderss
 655 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 656 * that this address is not dished out as part of alloc_iova.
 657 */
 658struct iova *
 659reserve_iova(struct iova_domain *iovad,
 660        unsigned long pfn_lo, unsigned long pfn_hi)
 661{
 662        struct rb_node *node;
 663        unsigned long flags;
 664        struct iova *iova;
 665        unsigned int overlap = 0;
 666
 667        /* Don't allow nonsensical pfns */
 668        if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
 669                return NULL;
 670
 671        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 672        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 673                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 674                        iova = rb_entry(node, struct iova, node);
 675                        __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 676                        if ((pfn_lo >= iova->pfn_lo) &&
 677                                (pfn_hi <= iova->pfn_hi))
 678                                goto finish;
 679                        overlap = 1;
 680
 681                } else if (overlap)
 682                                break;
 683        }
 684
 685        /* We are here either because this is the first reserver node
 686         * or need to insert remaining non overlap addr range
 687         */
 688        iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 689finish:
 690
 691        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 692        return iova;
 693}
 694EXPORT_SYMBOL_GPL(reserve_iova);
 695
 696/**
 697 * copy_reserved_iova - copies the reserved between domains
 698 * @from: - source doamin from where to copy
 699 * @to: - destination domin where to copy
 700 * This function copies reserved iova's from one doamin to
 701 * other.
 702 */
 703void
 704copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 705{
 706        unsigned long flags;
 707        struct rb_node *node;
 708
 709        spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 710        for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 711                struct iova *iova = rb_entry(node, struct iova, node);
 712                struct iova *new_iova;
 713
 714                if (iova->pfn_lo == IOVA_ANCHOR)
 715                        continue;
 716
 717                new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
 718                if (!new_iova)
 719                        printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 720                                iova->pfn_lo, iova->pfn_lo);
 721        }
 722        spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 723}
 724EXPORT_SYMBOL_GPL(copy_reserved_iova);
 725
 726struct iova *
 727split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
 728                      unsigned long pfn_lo, unsigned long pfn_hi)
 729{
 730        unsigned long flags;
 731        struct iova *prev = NULL, *next = NULL;
 732
 733        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 734        if (iova->pfn_lo < pfn_lo) {
 735                prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
 736                if (prev == NULL)
 737                        goto error;
 738        }
 739        if (iova->pfn_hi > pfn_hi) {
 740                next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
 741                if (next == NULL)
 742                        goto error;
 743        }
 744
 745        __cached_rbnode_delete_update(iovad, iova);
 746        rb_erase(&iova->node, &iovad->rbroot);
 747
 748        if (prev) {
 749                iova_insert_rbtree(&iovad->rbroot, prev, NULL);
 750                iova->pfn_lo = pfn_lo;
 751        }
 752        if (next) {
 753                iova_insert_rbtree(&iovad->rbroot, next, NULL);
 754                iova->pfn_hi = pfn_hi;
 755        }
 756        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 757
 758        return iova;
 759
 760error:
 761        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 762        if (prev)
 763                free_iova_mem(prev);
 764        return NULL;
 765}
 766
 767/*
 768 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 769 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 770 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 771 * For simplicity, we use a static magazine size and don't implement the
 772 * dynamic size tuning described in the paper.
 773 */
 774
 775#define IOVA_MAG_SIZE 128
 776
 777struct iova_magazine {
 778        unsigned long size;
 779        unsigned long pfns[IOVA_MAG_SIZE];
 780};
 781
 782struct iova_cpu_rcache {
 783        spinlock_t lock;
 784        struct iova_magazine *loaded;
 785        struct iova_magazine *prev;
 786};
 787
 788static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 789{
 790        return kzalloc(sizeof(struct iova_magazine), flags);
 791}
 792
 793static void iova_magazine_free(struct iova_magazine *mag)
 794{
 795        kfree(mag);
 796}
 797
 798static void
 799iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 800{
 801        unsigned long flags;
 802        int i;
 803
 804        if (!mag)
 805                return;
 806
 807        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 808
 809        for (i = 0 ; i < mag->size; ++i) {
 810                struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 811
 812                BUG_ON(!iova);
 813                private_free_iova(iovad, iova);
 814        }
 815
 816        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 817
 818        mag->size = 0;
 819}
 820
 821static bool iova_magazine_full(struct iova_magazine *mag)
 822{
 823        return (mag && mag->size == IOVA_MAG_SIZE);
 824}
 825
 826static bool iova_magazine_empty(struct iova_magazine *mag)
 827{
 828        return (!mag || mag->size == 0);
 829}
 830
 831static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 832                                       unsigned long limit_pfn)
 833{
 834        int i;
 835        unsigned long pfn;
 836
 837        BUG_ON(iova_magazine_empty(mag));
 838
 839        /* Only fall back to the rbtree if we have no suitable pfns at all */
 840        for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
 841                if (i == 0)
 842                        return 0;
 843
 844        /* Swap it to pop it */
 845        pfn = mag->pfns[i];
 846        mag->pfns[i] = mag->pfns[--mag->size];
 847
 848        return pfn;
 849}
 850
 851static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 852{
 853        BUG_ON(iova_magazine_full(mag));
 854
 855        mag->pfns[mag->size++] = pfn;
 856}
 857
 858static void init_iova_rcaches(struct iova_domain *iovad)
 859{
 860        struct iova_cpu_rcache *cpu_rcache;
 861        struct iova_rcache *rcache;
 862        unsigned int cpu;
 863        int i;
 864
 865        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 866                rcache = &iovad->rcaches[i];
 867                spin_lock_init(&rcache->lock);
 868                rcache->depot_size = 0;
 869                rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
 870                if (WARN_ON(!rcache->cpu_rcaches))
 871                        continue;
 872                for_each_possible_cpu(cpu) {
 873                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 874                        spin_lock_init(&cpu_rcache->lock);
 875                        cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 876                        cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 877                }
 878        }
 879}
 880
 881/*
 882 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 883 * return true on success.  Can fail if rcache is full and we can't free
 884 * space, and free_iova() (our only caller) will then return the IOVA
 885 * range to the rbtree instead.
 886 */
 887static bool __iova_rcache_insert(struct iova_domain *iovad,
 888                                 struct iova_rcache *rcache,
 889                                 unsigned long iova_pfn)
 890{
 891        struct iova_magazine *mag_to_free = NULL;
 892        struct iova_cpu_rcache *cpu_rcache;
 893        bool can_insert = false;
 894        unsigned long flags;
 895
 896        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 897        spin_lock_irqsave(&cpu_rcache->lock, flags);
 898
 899        if (!iova_magazine_full(cpu_rcache->loaded)) {
 900                can_insert = true;
 901        } else if (!iova_magazine_full(cpu_rcache->prev)) {
 902                swap(cpu_rcache->prev, cpu_rcache->loaded);
 903                can_insert = true;
 904        } else {
 905                struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 906
 907                if (new_mag) {
 908                        spin_lock(&rcache->lock);
 909                        if (rcache->depot_size < MAX_GLOBAL_MAGS) {
 910                                rcache->depot[rcache->depot_size++] =
 911                                                cpu_rcache->loaded;
 912                        } else {
 913                                mag_to_free = cpu_rcache->loaded;
 914                        }
 915                        spin_unlock(&rcache->lock);
 916
 917                        cpu_rcache->loaded = new_mag;
 918                        can_insert = true;
 919                }
 920        }
 921
 922        if (can_insert)
 923                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 924
 925        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 926
 927        if (mag_to_free) {
 928                iova_magazine_free_pfns(mag_to_free, iovad);
 929                iova_magazine_free(mag_to_free);
 930        }
 931
 932        return can_insert;
 933}
 934
 935static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 936                               unsigned long size)
 937{
 938        unsigned int log_size = order_base_2(size);
 939
 940        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 941                return false;
 942
 943        return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 944}
 945
 946/*
 947 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 948 * satisfy the request, return a matching non-NULL range and remove
 949 * it from the 'rcache'.
 950 */
 951static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 952                                       unsigned long limit_pfn)
 953{
 954        struct iova_cpu_rcache *cpu_rcache;
 955        unsigned long iova_pfn = 0;
 956        bool has_pfn = false;
 957        unsigned long flags;
 958
 959        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 960        spin_lock_irqsave(&cpu_rcache->lock, flags);
 961
 962        if (!iova_magazine_empty(cpu_rcache->loaded)) {
 963                has_pfn = true;
 964        } else if (!iova_magazine_empty(cpu_rcache->prev)) {
 965                swap(cpu_rcache->prev, cpu_rcache->loaded);
 966                has_pfn = true;
 967        } else {
 968                spin_lock(&rcache->lock);
 969                if (rcache->depot_size > 0) {
 970                        iova_magazine_free(cpu_rcache->loaded);
 971                        cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
 972                        has_pfn = true;
 973                }
 974                spin_unlock(&rcache->lock);
 975        }
 976
 977        if (has_pfn)
 978                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 979
 980        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 981
 982        return iova_pfn;
 983}
 984
 985/*
 986 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
 987 * size is too big or the DMA limit we are given isn't satisfied by the
 988 * top element in the magazine.
 989 */
 990static unsigned long iova_rcache_get(struct iova_domain *iovad,
 991                                     unsigned long size,
 992                                     unsigned long limit_pfn)
 993{
 994        unsigned int log_size = order_base_2(size);
 995
 996        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 997                return 0;
 998
 999        return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1000}
1001
1002/*
1003 * free rcache data structures.
1004 */
1005static void free_iova_rcaches(struct iova_domain *iovad)
1006{
1007        struct iova_rcache *rcache;
1008        struct iova_cpu_rcache *cpu_rcache;
1009        unsigned int cpu;
1010        int i, j;
1011
1012        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1013                rcache = &iovad->rcaches[i];
1014                for_each_possible_cpu(cpu) {
1015                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1016                        iova_magazine_free(cpu_rcache->loaded);
1017                        iova_magazine_free(cpu_rcache->prev);
1018                }
1019                free_percpu(rcache->cpu_rcaches);
1020                for (j = 0; j < rcache->depot_size; ++j)
1021                        iova_magazine_free(rcache->depot[j]);
1022        }
1023}
1024
1025/*
1026 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1027 */
1028void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1029{
1030        struct iova_cpu_rcache *cpu_rcache;
1031        struct iova_rcache *rcache;
1032        unsigned long flags;
1033        int i;
1034
1035        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1036                rcache = &iovad->rcaches[i];
1037                cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1038                spin_lock_irqsave(&cpu_rcache->lock, flags);
1039                iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1040                iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1041                spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1042        }
1043}
1044
1045MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1046MODULE_LICENSE("GPL");
1047