linux/drivers/iommu/iova.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2009, Intel Corporation.
   4 *
   5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   6 */
   7
   8#include <linux/iova.h>
   9#include <linux/module.h>
  10#include <linux/slab.h>
  11#include <linux/smp.h>
  12#include <linux/bitops.h>
  13#include <linux/cpu.h>
  14
  15/* The anchor node sits above the top of the usable address space */
  16#define IOVA_ANCHOR     ~0UL
  17
  18static bool iova_rcache_insert(struct iova_domain *iovad,
  19                               unsigned long pfn,
  20                               unsigned long size);
  21static unsigned long iova_rcache_get(struct iova_domain *iovad,
  22                                     unsigned long size,
  23                                     unsigned long limit_pfn);
  24static void init_iova_rcaches(struct iova_domain *iovad);
  25static void free_iova_rcaches(struct iova_domain *iovad);
  26static void fq_destroy_all_entries(struct iova_domain *iovad);
  27static void fq_flush_timeout(struct timer_list *t);
  28
  29void
  30init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  31        unsigned long start_pfn)
  32{
  33        /*
  34         * IOVA granularity will normally be equal to the smallest
  35         * supported IOMMU page size; both *must* be capable of
  36         * representing individual CPU pages exactly.
  37         */
  38        BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  39
  40        spin_lock_init(&iovad->iova_rbtree_lock);
  41        iovad->rbroot = RB_ROOT;
  42        iovad->cached_node = &iovad->anchor.node;
  43        iovad->cached32_node = &iovad->anchor.node;
  44        iovad->granule = granule;
  45        iovad->start_pfn = start_pfn;
  46        iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  47        iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  48        iovad->flush_cb = NULL;
  49        iovad->fq = NULL;
  50        iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  51        rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  52        rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  53        init_iova_rcaches(iovad);
  54}
  55EXPORT_SYMBOL_GPL(init_iova_domain);
  56
  57bool has_iova_flush_queue(struct iova_domain *iovad)
  58{
  59        return !!iovad->fq;
  60}
  61
  62static void free_iova_flush_queue(struct iova_domain *iovad)
  63{
  64        if (!has_iova_flush_queue(iovad))
  65                return;
  66
  67        if (timer_pending(&iovad->fq_timer))
  68                del_timer(&iovad->fq_timer);
  69
  70        fq_destroy_all_entries(iovad);
  71
  72        free_percpu(iovad->fq);
  73
  74        iovad->fq         = NULL;
  75        iovad->flush_cb   = NULL;
  76        iovad->entry_dtor = NULL;
  77}
  78
  79int init_iova_flush_queue(struct iova_domain *iovad,
  80                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
  81{
  82        struct iova_fq __percpu *queue;
  83        int cpu;
  84
  85        atomic64_set(&iovad->fq_flush_start_cnt,  0);
  86        atomic64_set(&iovad->fq_flush_finish_cnt, 0);
  87
  88        queue = alloc_percpu(struct iova_fq);
  89        if (!queue)
  90                return -ENOMEM;
  91
  92        iovad->flush_cb   = flush_cb;
  93        iovad->entry_dtor = entry_dtor;
  94
  95        for_each_possible_cpu(cpu) {
  96                struct iova_fq *fq;
  97
  98                fq = per_cpu_ptr(queue, cpu);
  99                fq->head = 0;
 100                fq->tail = 0;
 101
 102                spin_lock_init(&fq->lock);
 103        }
 104
 105        smp_wmb();
 106
 107        iovad->fq = queue;
 108
 109        timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 110        atomic_set(&iovad->fq_timer_on, 0);
 111
 112        return 0;
 113}
 114EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 115
 116static struct rb_node *
 117__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 118{
 119        if (limit_pfn <= iovad->dma_32bit_pfn)
 120                return iovad->cached32_node;
 121
 122        return iovad->cached_node;
 123}
 124
 125static void
 126__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 127{
 128        if (new->pfn_hi < iovad->dma_32bit_pfn)
 129                iovad->cached32_node = &new->node;
 130        else
 131                iovad->cached_node = &new->node;
 132}
 133
 134static void
 135__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 136{
 137        struct iova *cached_iova;
 138
 139        cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
 140        if (free == cached_iova ||
 141            (free->pfn_hi < iovad->dma_32bit_pfn &&
 142             free->pfn_lo >= cached_iova->pfn_lo)) {
 143                iovad->cached32_node = rb_next(&free->node);
 144                iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 145        }
 146
 147        cached_iova = rb_entry(iovad->cached_node, struct iova, node);
 148        if (free->pfn_lo >= cached_iova->pfn_lo)
 149                iovad->cached_node = rb_next(&free->node);
 150}
 151
 152/* Insert the iova into domain rbtree by holding writer lock */
 153static void
 154iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 155                   struct rb_node *start)
 156{
 157        struct rb_node **new, *parent = NULL;
 158
 159        new = (start) ? &start : &(root->rb_node);
 160        /* Figure out where to put new node */
 161        while (*new) {
 162                struct iova *this = rb_entry(*new, struct iova, node);
 163
 164                parent = *new;
 165
 166                if (iova->pfn_lo < this->pfn_lo)
 167                        new = &((*new)->rb_left);
 168                else if (iova->pfn_lo > this->pfn_lo)
 169                        new = &((*new)->rb_right);
 170                else {
 171                        WARN_ON(1); /* this should not happen */
 172                        return;
 173                }
 174        }
 175        /* Add new node and rebalance tree. */
 176        rb_link_node(&iova->node, parent, new);
 177        rb_insert_color(&iova->node, root);
 178}
 179
 180static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 181                unsigned long size, unsigned long limit_pfn,
 182                        struct iova *new, bool size_aligned)
 183{
 184        struct rb_node *curr, *prev;
 185        struct iova *curr_iova;
 186        unsigned long flags;
 187        unsigned long new_pfn;
 188        unsigned long align_mask = ~0UL;
 189
 190        if (size_aligned)
 191                align_mask <<= fls_long(size - 1);
 192
 193        /* Walk the tree backwards */
 194        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 195        if (limit_pfn <= iovad->dma_32bit_pfn &&
 196                        size >= iovad->max32_alloc_size)
 197                goto iova32_full;
 198
 199        curr = __get_cached_rbnode(iovad, limit_pfn);
 200        curr_iova = rb_entry(curr, struct iova, node);
 201        do {
 202                limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
 203                new_pfn = (limit_pfn - size) & align_mask;
 204                prev = curr;
 205                curr = rb_prev(curr);
 206                curr_iova = rb_entry(curr, struct iova, node);
 207        } while (curr && new_pfn <= curr_iova->pfn_hi);
 208
 209        if (limit_pfn < size || new_pfn < iovad->start_pfn) {
 210                iovad->max32_alloc_size = size;
 211                goto iova32_full;
 212        }
 213
 214        /* pfn_lo will point to size aligned address if size_aligned is set */
 215        new->pfn_lo = new_pfn;
 216        new->pfn_hi = new->pfn_lo + size - 1;
 217
 218        /* If we have 'prev', it's a valid place to start the insertion. */
 219        iova_insert_rbtree(&iovad->rbroot, new, prev);
 220        __cached_rbnode_insert_update(iovad, new);
 221
 222        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 223        return 0;
 224
 225iova32_full:
 226        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 227        return -ENOMEM;
 228}
 229
 230static struct kmem_cache *iova_cache;
 231static unsigned int iova_cache_users;
 232static DEFINE_MUTEX(iova_cache_mutex);
 233
 234struct iova *alloc_iova_mem(void)
 235{
 236        return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
 237}
 238EXPORT_SYMBOL(alloc_iova_mem);
 239
 240void free_iova_mem(struct iova *iova)
 241{
 242        if (iova->pfn_lo != IOVA_ANCHOR)
 243                kmem_cache_free(iova_cache, iova);
 244}
 245EXPORT_SYMBOL(free_iova_mem);
 246
 247int iova_cache_get(void)
 248{
 249        mutex_lock(&iova_cache_mutex);
 250        if (!iova_cache_users) {
 251                iova_cache = kmem_cache_create(
 252                        "iommu_iova", sizeof(struct iova), 0,
 253                        SLAB_HWCACHE_ALIGN, NULL);
 254                if (!iova_cache) {
 255                        mutex_unlock(&iova_cache_mutex);
 256                        printk(KERN_ERR "Couldn't create iova cache\n");
 257                        return -ENOMEM;
 258                }
 259        }
 260
 261        iova_cache_users++;
 262        mutex_unlock(&iova_cache_mutex);
 263
 264        return 0;
 265}
 266EXPORT_SYMBOL_GPL(iova_cache_get);
 267
 268void iova_cache_put(void)
 269{
 270        mutex_lock(&iova_cache_mutex);
 271        if (WARN_ON(!iova_cache_users)) {
 272                mutex_unlock(&iova_cache_mutex);
 273                return;
 274        }
 275        iova_cache_users--;
 276        if (!iova_cache_users)
 277                kmem_cache_destroy(iova_cache);
 278        mutex_unlock(&iova_cache_mutex);
 279}
 280EXPORT_SYMBOL_GPL(iova_cache_put);
 281
 282/**
 283 * alloc_iova - allocates an iova
 284 * @iovad: - iova domain in question
 285 * @size: - size of page frames to allocate
 286 * @limit_pfn: - max limit address
 287 * @size_aligned: - set if size_aligned address range is required
 288 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 289 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 290 * flag is set then the allocated address iova->pfn_lo will be naturally
 291 * aligned on roundup_power_of_two(size).
 292 */
 293struct iova *
 294alloc_iova(struct iova_domain *iovad, unsigned long size,
 295        unsigned long limit_pfn,
 296        bool size_aligned)
 297{
 298        struct iova *new_iova;
 299        int ret;
 300
 301        new_iova = alloc_iova_mem();
 302        if (!new_iova)
 303                return NULL;
 304
 305        ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 306                        new_iova, size_aligned);
 307
 308        if (ret) {
 309                free_iova_mem(new_iova);
 310                return NULL;
 311        }
 312
 313        return new_iova;
 314}
 315EXPORT_SYMBOL_GPL(alloc_iova);
 316
 317static struct iova *
 318private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 319{
 320        struct rb_node *node = iovad->rbroot.rb_node;
 321
 322        assert_spin_locked(&iovad->iova_rbtree_lock);
 323
 324        while (node) {
 325                struct iova *iova = rb_entry(node, struct iova, node);
 326
 327                if (pfn < iova->pfn_lo)
 328                        node = node->rb_left;
 329                else if (pfn > iova->pfn_hi)
 330                        node = node->rb_right;
 331                else
 332                        return iova;    /* pfn falls within iova's range */
 333        }
 334
 335        return NULL;
 336}
 337
 338static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
 339{
 340        assert_spin_locked(&iovad->iova_rbtree_lock);
 341        __cached_rbnode_delete_update(iovad, iova);
 342        rb_erase(&iova->node, &iovad->rbroot);
 343        free_iova_mem(iova);
 344}
 345
 346/**
 347 * find_iova - finds an iova for a given pfn
 348 * @iovad: - iova domain in question.
 349 * @pfn: - page frame number
 350 * This function finds and returns an iova belonging to the
 351 * given doamin which matches the given pfn.
 352 */
 353struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 354{
 355        unsigned long flags;
 356        struct iova *iova;
 357
 358        /* Take the lock so that no other thread is manipulating the rbtree */
 359        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 360        iova = private_find_iova(iovad, pfn);
 361        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 362        return iova;
 363}
 364EXPORT_SYMBOL_GPL(find_iova);
 365
 366/**
 367 * __free_iova - frees the given iova
 368 * @iovad: iova domain in question.
 369 * @iova: iova in question.
 370 * Frees the given iova belonging to the giving domain
 371 */
 372void
 373__free_iova(struct iova_domain *iovad, struct iova *iova)
 374{
 375        unsigned long flags;
 376
 377        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 378        private_free_iova(iovad, iova);
 379        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 380}
 381EXPORT_SYMBOL_GPL(__free_iova);
 382
 383/**
 384 * free_iova - finds and frees the iova for a given pfn
 385 * @iovad: - iova domain in question.
 386 * @pfn: - pfn that is allocated previously
 387 * This functions finds an iova for a given pfn and then
 388 * frees the iova from that domain.
 389 */
 390void
 391free_iova(struct iova_domain *iovad, unsigned long pfn)
 392{
 393        struct iova *iova = find_iova(iovad, pfn);
 394
 395        if (iova)
 396                __free_iova(iovad, iova);
 397
 398}
 399EXPORT_SYMBOL_GPL(free_iova);
 400
 401/**
 402 * alloc_iova_fast - allocates an iova from rcache
 403 * @iovad: - iova domain in question
 404 * @size: - size of page frames to allocate
 405 * @limit_pfn: - max limit address
 406 * @flush_rcache: - set to flush rcache on regular allocation failure
 407 * This function tries to satisfy an iova allocation from the rcache,
 408 * and falls back to regular allocation on failure. If regular allocation
 409 * fails too and the flush_rcache flag is set then the rcache will be flushed.
 410*/
 411unsigned long
 412alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 413                unsigned long limit_pfn, bool flush_rcache)
 414{
 415        unsigned long iova_pfn;
 416        struct iova *new_iova;
 417
 418        iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
 419        if (iova_pfn)
 420                return iova_pfn;
 421
 422retry:
 423        new_iova = alloc_iova(iovad, size, limit_pfn, true);
 424        if (!new_iova) {
 425                unsigned int cpu;
 426
 427                if (!flush_rcache)
 428                        return 0;
 429
 430                /* Try replenishing IOVAs by flushing rcache. */
 431                flush_rcache = false;
 432                for_each_online_cpu(cpu)
 433                        free_cpu_cached_iovas(cpu, iovad);
 434                goto retry;
 435        }
 436
 437        return new_iova->pfn_lo;
 438}
 439EXPORT_SYMBOL_GPL(alloc_iova_fast);
 440
 441/**
 442 * free_iova_fast - free iova pfn range into rcache
 443 * @iovad: - iova domain in question.
 444 * @pfn: - pfn that is allocated previously
 445 * @size: - # of pages in range
 446 * This functions frees an iova range by trying to put it into the rcache,
 447 * falling back to regular iova deallocation via free_iova() if this fails.
 448 */
 449void
 450free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 451{
 452        if (iova_rcache_insert(iovad, pfn, size))
 453                return;
 454
 455        free_iova(iovad, pfn);
 456}
 457EXPORT_SYMBOL_GPL(free_iova_fast);
 458
 459#define fq_ring_for_each(i, fq) \
 460        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 461
 462static inline bool fq_full(struct iova_fq *fq)
 463{
 464        assert_spin_locked(&fq->lock);
 465        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 466}
 467
 468static inline unsigned fq_ring_add(struct iova_fq *fq)
 469{
 470        unsigned idx = fq->tail;
 471
 472        assert_spin_locked(&fq->lock);
 473
 474        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 475
 476        return idx;
 477}
 478
 479static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 480{
 481        u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 482        unsigned idx;
 483
 484        assert_spin_locked(&fq->lock);
 485
 486        fq_ring_for_each(idx, fq) {
 487
 488                if (fq->entries[idx].counter >= counter)
 489                        break;
 490
 491                if (iovad->entry_dtor)
 492                        iovad->entry_dtor(fq->entries[idx].data);
 493
 494                free_iova_fast(iovad,
 495                               fq->entries[idx].iova_pfn,
 496                               fq->entries[idx].pages);
 497
 498                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 499        }
 500}
 501
 502static void iova_domain_flush(struct iova_domain *iovad)
 503{
 504        atomic64_inc(&iovad->fq_flush_start_cnt);
 505        iovad->flush_cb(iovad);
 506        atomic64_inc(&iovad->fq_flush_finish_cnt);
 507}
 508
 509static void fq_destroy_all_entries(struct iova_domain *iovad)
 510{
 511        int cpu;
 512
 513        /*
 514         * This code runs when the iova_domain is being detroyed, so don't
 515         * bother to free iovas, just call the entry_dtor on all remaining
 516         * entries.
 517         */
 518        if (!iovad->entry_dtor)
 519                return;
 520
 521        for_each_possible_cpu(cpu) {
 522                struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
 523                int idx;
 524
 525                fq_ring_for_each(idx, fq)
 526                        iovad->entry_dtor(fq->entries[idx].data);
 527        }
 528}
 529
 530static void fq_flush_timeout(struct timer_list *t)
 531{
 532        struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
 533        int cpu;
 534
 535        atomic_set(&iovad->fq_timer_on, 0);
 536        iova_domain_flush(iovad);
 537
 538        for_each_possible_cpu(cpu) {
 539                unsigned long flags;
 540                struct iova_fq *fq;
 541
 542                fq = per_cpu_ptr(iovad->fq, cpu);
 543                spin_lock_irqsave(&fq->lock, flags);
 544                fq_ring_free(iovad, fq);
 545                spin_unlock_irqrestore(&fq->lock, flags);
 546        }
 547}
 548
 549void queue_iova(struct iova_domain *iovad,
 550                unsigned long pfn, unsigned long pages,
 551                unsigned long data)
 552{
 553        struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
 554        unsigned long flags;
 555        unsigned idx;
 556
 557        spin_lock_irqsave(&fq->lock, flags);
 558
 559        /*
 560         * First remove all entries from the flush queue that have already been
 561         * flushed out on another CPU. This makes the fq_full() check below less
 562         * likely to be true.
 563         */
 564        fq_ring_free(iovad, fq);
 565
 566        if (fq_full(fq)) {
 567                iova_domain_flush(iovad);
 568                fq_ring_free(iovad, fq);
 569        }
 570
 571        idx = fq_ring_add(fq);
 572
 573        fq->entries[idx].iova_pfn = pfn;
 574        fq->entries[idx].pages    = pages;
 575        fq->entries[idx].data     = data;
 576        fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 577
 578        spin_unlock_irqrestore(&fq->lock, flags);
 579
 580        /* Avoid false sharing as much as possible. */
 581        if (!atomic_read(&iovad->fq_timer_on) &&
 582            !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1))
 583                mod_timer(&iovad->fq_timer,
 584                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 585}
 586EXPORT_SYMBOL_GPL(queue_iova);
 587
 588/**
 589 * put_iova_domain - destroys the iova doamin
 590 * @iovad: - iova domain in question.
 591 * All the iova's in that domain are destroyed.
 592 */
 593void put_iova_domain(struct iova_domain *iovad)
 594{
 595        struct iova *iova, *tmp;
 596
 597        free_iova_flush_queue(iovad);
 598        free_iova_rcaches(iovad);
 599        rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
 600                free_iova_mem(iova);
 601}
 602EXPORT_SYMBOL_GPL(put_iova_domain);
 603
 604static int
 605__is_range_overlap(struct rb_node *node,
 606        unsigned long pfn_lo, unsigned long pfn_hi)
 607{
 608        struct iova *iova = rb_entry(node, struct iova, node);
 609
 610        if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 611                return 1;
 612        return 0;
 613}
 614
 615static inline struct iova *
 616alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 617{
 618        struct iova *iova;
 619
 620        iova = alloc_iova_mem();
 621        if (iova) {
 622                iova->pfn_lo = pfn_lo;
 623                iova->pfn_hi = pfn_hi;
 624        }
 625
 626        return iova;
 627}
 628
 629static struct iova *
 630__insert_new_range(struct iova_domain *iovad,
 631        unsigned long pfn_lo, unsigned long pfn_hi)
 632{
 633        struct iova *iova;
 634
 635        iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 636        if (iova)
 637                iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 638
 639        return iova;
 640}
 641
 642static void
 643__adjust_overlap_range(struct iova *iova,
 644        unsigned long *pfn_lo, unsigned long *pfn_hi)
 645{
 646        if (*pfn_lo < iova->pfn_lo)
 647                iova->pfn_lo = *pfn_lo;
 648        if (*pfn_hi > iova->pfn_hi)
 649                *pfn_lo = iova->pfn_hi + 1;
 650}
 651
 652/**
 653 * reserve_iova - reserves an iova in the given range
 654 * @iovad: - iova domain pointer
 655 * @pfn_lo: - lower page frame address
 656 * @pfn_hi:- higher pfn adderss
 657 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 658 * that this address is not dished out as part of alloc_iova.
 659 */
 660struct iova *
 661reserve_iova(struct iova_domain *iovad,
 662        unsigned long pfn_lo, unsigned long pfn_hi)
 663{
 664        struct rb_node *node;
 665        unsigned long flags;
 666        struct iova *iova;
 667        unsigned int overlap = 0;
 668
 669        /* Don't allow nonsensical pfns */
 670        if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
 671                return NULL;
 672
 673        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 674        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 675                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 676                        iova = rb_entry(node, struct iova, node);
 677                        __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 678                        if ((pfn_lo >= iova->pfn_lo) &&
 679                                (pfn_hi <= iova->pfn_hi))
 680                                goto finish;
 681                        overlap = 1;
 682
 683                } else if (overlap)
 684                                break;
 685        }
 686
 687        /* We are here either because this is the first reserver node
 688         * or need to insert remaining non overlap addr range
 689         */
 690        iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 691finish:
 692
 693        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 694        return iova;
 695}
 696EXPORT_SYMBOL_GPL(reserve_iova);
 697
 698/**
 699 * copy_reserved_iova - copies the reserved between domains
 700 * @from: - source doamin from where to copy
 701 * @to: - destination domin where to copy
 702 * This function copies reserved iova's from one doamin to
 703 * other.
 704 */
 705void
 706copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 707{
 708        unsigned long flags;
 709        struct rb_node *node;
 710
 711        spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 712        for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 713                struct iova *iova = rb_entry(node, struct iova, node);
 714                struct iova *new_iova;
 715
 716                if (iova->pfn_lo == IOVA_ANCHOR)
 717                        continue;
 718
 719                new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
 720                if (!new_iova)
 721                        printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 722                                iova->pfn_lo, iova->pfn_lo);
 723        }
 724        spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 725}
 726EXPORT_SYMBOL_GPL(copy_reserved_iova);
 727
 728struct iova *
 729split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
 730                      unsigned long pfn_lo, unsigned long pfn_hi)
 731{
 732        unsigned long flags;
 733        struct iova *prev = NULL, *next = NULL;
 734
 735        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 736        if (iova->pfn_lo < pfn_lo) {
 737                prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
 738                if (prev == NULL)
 739                        goto error;
 740        }
 741        if (iova->pfn_hi > pfn_hi) {
 742                next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
 743                if (next == NULL)
 744                        goto error;
 745        }
 746
 747        __cached_rbnode_delete_update(iovad, iova);
 748        rb_erase(&iova->node, &iovad->rbroot);
 749
 750        if (prev) {
 751                iova_insert_rbtree(&iovad->rbroot, prev, NULL);
 752                iova->pfn_lo = pfn_lo;
 753        }
 754        if (next) {
 755                iova_insert_rbtree(&iovad->rbroot, next, NULL);
 756                iova->pfn_hi = pfn_hi;
 757        }
 758        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 759
 760        return iova;
 761
 762error:
 763        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 764        if (prev)
 765                free_iova_mem(prev);
 766        return NULL;
 767}
 768
 769/*
 770 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 771 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 772 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 773 * For simplicity, we use a static magazine size and don't implement the
 774 * dynamic size tuning described in the paper.
 775 */
 776
 777#define IOVA_MAG_SIZE 128
 778
 779struct iova_magazine {
 780        unsigned long size;
 781        unsigned long pfns[IOVA_MAG_SIZE];
 782};
 783
 784struct iova_cpu_rcache {
 785        spinlock_t lock;
 786        struct iova_magazine *loaded;
 787        struct iova_magazine *prev;
 788};
 789
 790static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 791{
 792        return kzalloc(sizeof(struct iova_magazine), flags);
 793}
 794
 795static void iova_magazine_free(struct iova_magazine *mag)
 796{
 797        kfree(mag);
 798}
 799
 800static void
 801iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 802{
 803        unsigned long flags;
 804        int i;
 805
 806        if (!mag)
 807                return;
 808
 809        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 810
 811        for (i = 0 ; i < mag->size; ++i) {
 812                struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 813
 814                BUG_ON(!iova);
 815                private_free_iova(iovad, iova);
 816        }
 817
 818        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 819
 820        mag->size = 0;
 821}
 822
 823static bool iova_magazine_full(struct iova_magazine *mag)
 824{
 825        return (mag && mag->size == IOVA_MAG_SIZE);
 826}
 827
 828static bool iova_magazine_empty(struct iova_magazine *mag)
 829{
 830        return (!mag || mag->size == 0);
 831}
 832
 833static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 834                                       unsigned long limit_pfn)
 835{
 836        int i;
 837        unsigned long pfn;
 838
 839        BUG_ON(iova_magazine_empty(mag));
 840
 841        /* Only fall back to the rbtree if we have no suitable pfns at all */
 842        for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
 843                if (i == 0)
 844                        return 0;
 845
 846        /* Swap it to pop it */
 847        pfn = mag->pfns[i];
 848        mag->pfns[i] = mag->pfns[--mag->size];
 849
 850        return pfn;
 851}
 852
 853static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 854{
 855        BUG_ON(iova_magazine_full(mag));
 856
 857        mag->pfns[mag->size++] = pfn;
 858}
 859
 860static void init_iova_rcaches(struct iova_domain *iovad)
 861{
 862        struct iova_cpu_rcache *cpu_rcache;
 863        struct iova_rcache *rcache;
 864        unsigned int cpu;
 865        int i;
 866
 867        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 868                rcache = &iovad->rcaches[i];
 869                spin_lock_init(&rcache->lock);
 870                rcache->depot_size = 0;
 871                rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
 872                if (WARN_ON(!rcache->cpu_rcaches))
 873                        continue;
 874                for_each_possible_cpu(cpu) {
 875                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 876                        spin_lock_init(&cpu_rcache->lock);
 877                        cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 878                        cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 879                }
 880        }
 881}
 882
 883/*
 884 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 885 * return true on success.  Can fail if rcache is full and we can't free
 886 * space, and free_iova() (our only caller) will then return the IOVA
 887 * range to the rbtree instead.
 888 */
 889static bool __iova_rcache_insert(struct iova_domain *iovad,
 890                                 struct iova_rcache *rcache,
 891                                 unsigned long iova_pfn)
 892{
 893        struct iova_magazine *mag_to_free = NULL;
 894        struct iova_cpu_rcache *cpu_rcache;
 895        bool can_insert = false;
 896        unsigned long flags;
 897
 898        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 899        spin_lock_irqsave(&cpu_rcache->lock, flags);
 900
 901        if (!iova_magazine_full(cpu_rcache->loaded)) {
 902                can_insert = true;
 903        } else if (!iova_magazine_full(cpu_rcache->prev)) {
 904                swap(cpu_rcache->prev, cpu_rcache->loaded);
 905                can_insert = true;
 906        } else {
 907                struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 908
 909                if (new_mag) {
 910                        spin_lock(&rcache->lock);
 911                        if (rcache->depot_size < MAX_GLOBAL_MAGS) {
 912                                rcache->depot[rcache->depot_size++] =
 913                                                cpu_rcache->loaded;
 914                        } else {
 915                                mag_to_free = cpu_rcache->loaded;
 916                        }
 917                        spin_unlock(&rcache->lock);
 918
 919                        cpu_rcache->loaded = new_mag;
 920                        can_insert = true;
 921                }
 922        }
 923
 924        if (can_insert)
 925                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 926
 927        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 928
 929        if (mag_to_free) {
 930                iova_magazine_free_pfns(mag_to_free, iovad);
 931                iova_magazine_free(mag_to_free);
 932        }
 933
 934        return can_insert;
 935}
 936
 937static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 938                               unsigned long size)
 939{
 940        unsigned int log_size = order_base_2(size);
 941
 942        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 943                return false;
 944
 945        return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 946}
 947
 948/*
 949 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 950 * satisfy the request, return a matching non-NULL range and remove
 951 * it from the 'rcache'.
 952 */
 953static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 954                                       unsigned long limit_pfn)
 955{
 956        struct iova_cpu_rcache *cpu_rcache;
 957        unsigned long iova_pfn = 0;
 958        bool has_pfn = false;
 959        unsigned long flags;
 960
 961        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 962        spin_lock_irqsave(&cpu_rcache->lock, flags);
 963
 964        if (!iova_magazine_empty(cpu_rcache->loaded)) {
 965                has_pfn = true;
 966        } else if (!iova_magazine_empty(cpu_rcache->prev)) {
 967                swap(cpu_rcache->prev, cpu_rcache->loaded);
 968                has_pfn = true;
 969        } else {
 970                spin_lock(&rcache->lock);
 971                if (rcache->depot_size > 0) {
 972                        iova_magazine_free(cpu_rcache->loaded);
 973                        cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
 974                        has_pfn = true;
 975                }
 976                spin_unlock(&rcache->lock);
 977        }
 978
 979        if (has_pfn)
 980                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 981
 982        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 983
 984        return iova_pfn;
 985}
 986
 987/*
 988 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
 989 * size is too big or the DMA limit we are given isn't satisfied by the
 990 * top element in the magazine.
 991 */
 992static unsigned long iova_rcache_get(struct iova_domain *iovad,
 993                                     unsigned long size,
 994                                     unsigned long limit_pfn)
 995{
 996        unsigned int log_size = order_base_2(size);
 997
 998        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 999                return 0;
1000
1001        return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1002}
1003
1004/*
1005 * free rcache data structures.
1006 */
1007static void free_iova_rcaches(struct iova_domain *iovad)
1008{
1009        struct iova_rcache *rcache;
1010        struct iova_cpu_rcache *cpu_rcache;
1011        unsigned int cpu;
1012        int i, j;
1013
1014        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1015                rcache = &iovad->rcaches[i];
1016                for_each_possible_cpu(cpu) {
1017                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1018                        iova_magazine_free(cpu_rcache->loaded);
1019                        iova_magazine_free(cpu_rcache->prev);
1020                }
1021                free_percpu(rcache->cpu_rcaches);
1022                for (j = 0; j < rcache->depot_size; ++j)
1023                        iova_magazine_free(rcache->depot[j]);
1024        }
1025}
1026
1027/*
1028 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1029 */
1030void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1031{
1032        struct iova_cpu_rcache *cpu_rcache;
1033        struct iova_rcache *rcache;
1034        unsigned long flags;
1035        int i;
1036
1037        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1038                rcache = &iovad->rcaches[i];
1039                cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1040                spin_lock_irqsave(&cpu_rcache->lock, flags);
1041                iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1042                iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1043                spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1044        }
1045}
1046
1047MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1048MODULE_LICENSE("GPL");
1049