linux/drivers/iommu/iova.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2009, Intel Corporation.
   4 *
   5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   6 */
   7
   8#include <linux/iova.h>
   9#include <linux/module.h>
  10#include <linux/slab.h>
  11#include <linux/smp.h>
  12#include <linux/bitops.h>
  13#include <linux/cpu.h>
  14
  15/* The anchor node sits above the top of the usable address space */
  16#define IOVA_ANCHOR     ~0UL
  17
  18static bool iova_rcache_insert(struct iova_domain *iovad,
  19                               unsigned long pfn,
  20                               unsigned long size);
  21static unsigned long iova_rcache_get(struct iova_domain *iovad,
  22                                     unsigned long size,
  23                                     unsigned long limit_pfn);
  24static void init_iova_rcaches(struct iova_domain *iovad);
  25static void free_iova_rcaches(struct iova_domain *iovad);
  26static void fq_destroy_all_entries(struct iova_domain *iovad);
  27static void fq_flush_timeout(struct timer_list *t);
  28
  29void
  30init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  31        unsigned long start_pfn)
  32{
  33        /*
  34         * IOVA granularity will normally be equal to the smallest
  35         * supported IOMMU page size; both *must* be capable of
  36         * representing individual CPU pages exactly.
  37         */
  38        BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  39
  40        spin_lock_init(&iovad->iova_rbtree_lock);
  41        iovad->rbroot = RB_ROOT;
  42        iovad->cached_node = &iovad->anchor.node;
  43        iovad->cached32_node = &iovad->anchor.node;
  44        iovad->granule = granule;
  45        iovad->start_pfn = start_pfn;
  46        iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  47        iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  48        iovad->flush_cb = NULL;
  49        iovad->fq = NULL;
  50        iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  51        rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  52        rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  53        init_iova_rcaches(iovad);
  54}
  55EXPORT_SYMBOL_GPL(init_iova_domain);
  56
  57bool has_iova_flush_queue(struct iova_domain *iovad)
  58{
  59        return !!iovad->fq;
  60}
  61
  62static void free_iova_flush_queue(struct iova_domain *iovad)
  63{
  64        if (!has_iova_flush_queue(iovad))
  65                return;
  66
  67        if (timer_pending(&iovad->fq_timer))
  68                del_timer(&iovad->fq_timer);
  69
  70        fq_destroy_all_entries(iovad);
  71
  72        free_percpu(iovad->fq);
  73
  74        iovad->fq         = NULL;
  75        iovad->flush_cb   = NULL;
  76        iovad->entry_dtor = NULL;
  77}
  78
  79int init_iova_flush_queue(struct iova_domain *iovad,
  80                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
  81{
  82        struct iova_fq __percpu *queue;
  83        int cpu;
  84
  85        atomic64_set(&iovad->fq_flush_start_cnt,  0);
  86        atomic64_set(&iovad->fq_flush_finish_cnt, 0);
  87
  88        queue = alloc_percpu(struct iova_fq);
  89        if (!queue)
  90                return -ENOMEM;
  91
  92        iovad->flush_cb   = flush_cb;
  93        iovad->entry_dtor = entry_dtor;
  94
  95        for_each_possible_cpu(cpu) {
  96                struct iova_fq *fq;
  97
  98                fq = per_cpu_ptr(queue, cpu);
  99                fq->head = 0;
 100                fq->tail = 0;
 101
 102                spin_lock_init(&fq->lock);
 103        }
 104
 105        smp_wmb();
 106
 107        iovad->fq = queue;
 108
 109        timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 110        atomic_set(&iovad->fq_timer_on, 0);
 111
 112        return 0;
 113}
 114EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 115
 116static struct rb_node *
 117__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 118{
 119        if (limit_pfn <= iovad->dma_32bit_pfn)
 120                return iovad->cached32_node;
 121
 122        return iovad->cached_node;
 123}
 124
 125static void
 126__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 127{
 128        if (new->pfn_hi < iovad->dma_32bit_pfn)
 129                iovad->cached32_node = &new->node;
 130        else
 131                iovad->cached_node = &new->node;
 132}
 133
 134static void
 135__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 136{
 137        struct iova *cached_iova;
 138
 139        cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
 140        if (free == cached_iova ||
 141            (free->pfn_hi < iovad->dma_32bit_pfn &&
 142             free->pfn_lo >= cached_iova->pfn_lo)) {
 143                iovad->cached32_node = rb_next(&free->node);
 144                iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 145        }
 146
 147        cached_iova = rb_entry(iovad->cached_node, struct iova, node);
 148        if (free->pfn_lo >= cached_iova->pfn_lo)
 149                iovad->cached_node = rb_next(&free->node);
 150}
 151
 152/* Insert the iova into domain rbtree by holding writer lock */
 153static void
 154iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 155                   struct rb_node *start)
 156{
 157        struct rb_node **new, *parent = NULL;
 158
 159        new = (start) ? &start : &(root->rb_node);
 160        /* Figure out where to put new node */
 161        while (*new) {
 162                struct iova *this = rb_entry(*new, struct iova, node);
 163
 164                parent = *new;
 165
 166                if (iova->pfn_lo < this->pfn_lo)
 167                        new = &((*new)->rb_left);
 168                else if (iova->pfn_lo > this->pfn_lo)
 169                        new = &((*new)->rb_right);
 170                else {
 171                        WARN_ON(1); /* this should not happen */
 172                        return;
 173                }
 174        }
 175        /* Add new node and rebalance tree. */
 176        rb_link_node(&iova->node, parent, new);
 177        rb_insert_color(&iova->node, root);
 178}
 179
 180static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 181                unsigned long size, unsigned long limit_pfn,
 182                        struct iova *new, bool size_aligned)
 183{
 184        struct rb_node *curr, *prev;
 185        struct iova *curr_iova;
 186        unsigned long flags;
 187        unsigned long new_pfn;
 188        unsigned long align_mask = ~0UL;
 189
 190        if (size_aligned)
 191                align_mask <<= fls_long(size - 1);
 192
 193        /* Walk the tree backwards */
 194        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 195        if (limit_pfn <= iovad->dma_32bit_pfn &&
 196                        size >= iovad->max32_alloc_size)
 197                goto iova32_full;
 198
 199        curr = __get_cached_rbnode(iovad, limit_pfn);
 200        curr_iova = rb_entry(curr, struct iova, node);
 201        do {
 202                limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
 203                new_pfn = (limit_pfn - size) & align_mask;
 204                prev = curr;
 205                curr = rb_prev(curr);
 206                curr_iova = rb_entry(curr, struct iova, node);
 207        } while (curr && new_pfn <= curr_iova->pfn_hi);
 208
 209        if (limit_pfn < size || new_pfn < iovad->start_pfn) {
 210                iovad->max32_alloc_size = size;
 211                goto iova32_full;
 212        }
 213
 214        /* pfn_lo will point to size aligned address if size_aligned is set */
 215        new->pfn_lo = new_pfn;
 216        new->pfn_hi = new->pfn_lo + size - 1;
 217
 218        /* If we have 'prev', it's a valid place to start the insertion. */
 219        iova_insert_rbtree(&iovad->rbroot, new, prev);
 220        __cached_rbnode_insert_update(iovad, new);
 221
 222        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 223        return 0;
 224
 225iova32_full:
 226        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 227        return -ENOMEM;
 228}
 229
 230static struct kmem_cache *iova_cache;
 231static unsigned int iova_cache_users;
 232static DEFINE_MUTEX(iova_cache_mutex);
 233
 234struct iova *alloc_iova_mem(void)
 235{
 236        return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
 237}
 238EXPORT_SYMBOL(alloc_iova_mem);
 239
 240void free_iova_mem(struct iova *iova)
 241{
 242        if (iova->pfn_lo != IOVA_ANCHOR)
 243                kmem_cache_free(iova_cache, iova);
 244}
 245EXPORT_SYMBOL(free_iova_mem);
 246
 247int iova_cache_get(void)
 248{
 249        mutex_lock(&iova_cache_mutex);
 250        if (!iova_cache_users) {
 251                iova_cache = kmem_cache_create(
 252                        "iommu_iova", sizeof(struct iova), 0,
 253                        SLAB_HWCACHE_ALIGN, NULL);
 254                if (!iova_cache) {
 255                        mutex_unlock(&iova_cache_mutex);
 256                        pr_err("Couldn't create iova cache\n");
 257                        return -ENOMEM;
 258                }
 259        }
 260
 261        iova_cache_users++;
 262        mutex_unlock(&iova_cache_mutex);
 263
 264        return 0;
 265}
 266EXPORT_SYMBOL_GPL(iova_cache_get);
 267
 268void iova_cache_put(void)
 269{
 270        mutex_lock(&iova_cache_mutex);
 271        if (WARN_ON(!iova_cache_users)) {
 272                mutex_unlock(&iova_cache_mutex);
 273                return;
 274        }
 275        iova_cache_users--;
 276        if (!iova_cache_users)
 277                kmem_cache_destroy(iova_cache);
 278        mutex_unlock(&iova_cache_mutex);
 279}
 280EXPORT_SYMBOL_GPL(iova_cache_put);
 281
 282/**
 283 * alloc_iova - allocates an iova
 284 * @iovad: - iova domain in question
 285 * @size: - size of page frames to allocate
 286 * @limit_pfn: - max limit address
 287 * @size_aligned: - set if size_aligned address range is required
 288 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 289 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 290 * flag is set then the allocated address iova->pfn_lo will be naturally
 291 * aligned on roundup_power_of_two(size).
 292 */
 293struct iova *
 294alloc_iova(struct iova_domain *iovad, unsigned long size,
 295        unsigned long limit_pfn,
 296        bool size_aligned)
 297{
 298        struct iova *new_iova;
 299        int ret;
 300
 301        new_iova = alloc_iova_mem();
 302        if (!new_iova)
 303                return NULL;
 304
 305        ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 306                        new_iova, size_aligned);
 307
 308        if (ret) {
 309                free_iova_mem(new_iova);
 310                return NULL;
 311        }
 312
 313        return new_iova;
 314}
 315EXPORT_SYMBOL_GPL(alloc_iova);
 316
 317static struct iova *
 318private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 319{
 320        struct rb_node *node = iovad->rbroot.rb_node;
 321
 322        assert_spin_locked(&iovad->iova_rbtree_lock);
 323
 324        while (node) {
 325                struct iova *iova = rb_entry(node, struct iova, node);
 326
 327                if (pfn < iova->pfn_lo)
 328                        node = node->rb_left;
 329                else if (pfn > iova->pfn_hi)
 330                        node = node->rb_right;
 331                else
 332                        return iova;    /* pfn falls within iova's range */
 333        }
 334
 335        return NULL;
 336}
 337
 338static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
 339{
 340        assert_spin_locked(&iovad->iova_rbtree_lock);
 341        __cached_rbnode_delete_update(iovad, iova);
 342        rb_erase(&iova->node, &iovad->rbroot);
 343        free_iova_mem(iova);
 344}
 345
 346/**
 347 * find_iova - finds an iova for a given pfn
 348 * @iovad: - iova domain in question.
 349 * @pfn: - page frame number
 350 * This function finds and returns an iova belonging to the
 351 * given doamin which matches the given pfn.
 352 */
 353struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 354{
 355        unsigned long flags;
 356        struct iova *iova;
 357
 358        /* Take the lock so that no other thread is manipulating the rbtree */
 359        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 360        iova = private_find_iova(iovad, pfn);
 361        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 362        return iova;
 363}
 364EXPORT_SYMBOL_GPL(find_iova);
 365
 366/**
 367 * __free_iova - frees the given iova
 368 * @iovad: iova domain in question.
 369 * @iova: iova in question.
 370 * Frees the given iova belonging to the giving domain
 371 */
 372void
 373__free_iova(struct iova_domain *iovad, struct iova *iova)
 374{
 375        unsigned long flags;
 376
 377        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 378        private_free_iova(iovad, iova);
 379        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 380}
 381EXPORT_SYMBOL_GPL(__free_iova);
 382
 383/**
 384 * free_iova - finds and frees the iova for a given pfn
 385 * @iovad: - iova domain in question.
 386 * @pfn: - pfn that is allocated previously
 387 * This functions finds an iova for a given pfn and then
 388 * frees the iova from that domain.
 389 */
 390void
 391free_iova(struct iova_domain *iovad, unsigned long pfn)
 392{
 393        struct iova *iova = find_iova(iovad, pfn);
 394
 395        if (iova)
 396                __free_iova(iovad, iova);
 397
 398}
 399EXPORT_SYMBOL_GPL(free_iova);
 400
 401/**
 402 * alloc_iova_fast - allocates an iova from rcache
 403 * @iovad: - iova domain in question
 404 * @size: - size of page frames to allocate
 405 * @limit_pfn: - max limit address
 406 * @flush_rcache: - set to flush rcache on regular allocation failure
 407 * This function tries to satisfy an iova allocation from the rcache,
 408 * and falls back to regular allocation on failure. If regular allocation
 409 * fails too and the flush_rcache flag is set then the rcache will be flushed.
 410*/
 411unsigned long
 412alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 413                unsigned long limit_pfn, bool flush_rcache)
 414{
 415        unsigned long iova_pfn;
 416        struct iova *new_iova;
 417
 418        iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
 419        if (iova_pfn)
 420                return iova_pfn;
 421
 422retry:
 423        new_iova = alloc_iova(iovad, size, limit_pfn, true);
 424        if (!new_iova) {
 425                unsigned int cpu;
 426
 427                if (!flush_rcache)
 428                        return 0;
 429
 430                /* Try replenishing IOVAs by flushing rcache. */
 431                flush_rcache = false;
 432                for_each_online_cpu(cpu)
 433                        free_cpu_cached_iovas(cpu, iovad);
 434                goto retry;
 435        }
 436
 437        return new_iova->pfn_lo;
 438}
 439EXPORT_SYMBOL_GPL(alloc_iova_fast);
 440
 441/**
 442 * free_iova_fast - free iova pfn range into rcache
 443 * @iovad: - iova domain in question.
 444 * @pfn: - pfn that is allocated previously
 445 * @size: - # of pages in range
 446 * This functions frees an iova range by trying to put it into the rcache,
 447 * falling back to regular iova deallocation via free_iova() if this fails.
 448 */
 449void
 450free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 451{
 452        if (iova_rcache_insert(iovad, pfn, size))
 453                return;
 454
 455        free_iova(iovad, pfn);
 456}
 457EXPORT_SYMBOL_GPL(free_iova_fast);
 458
 459#define fq_ring_for_each(i, fq) \
 460        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 461
 462static inline bool fq_full(struct iova_fq *fq)
 463{
 464        assert_spin_locked(&fq->lock);
 465        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 466}
 467
 468static inline unsigned fq_ring_add(struct iova_fq *fq)
 469{
 470        unsigned idx = fq->tail;
 471
 472        assert_spin_locked(&fq->lock);
 473
 474        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 475
 476        return idx;
 477}
 478
 479static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 480{
 481        u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 482        unsigned idx;
 483
 484        assert_spin_locked(&fq->lock);
 485
 486        fq_ring_for_each(idx, fq) {
 487
 488                if (fq->entries[idx].counter >= counter)
 489                        break;
 490
 491                if (iovad->entry_dtor)
 492                        iovad->entry_dtor(fq->entries[idx].data);
 493
 494                free_iova_fast(iovad,
 495                               fq->entries[idx].iova_pfn,
 496                               fq->entries[idx].pages);
 497
 498                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 499        }
 500}
 501
 502static void iova_domain_flush(struct iova_domain *iovad)
 503{
 504        atomic64_inc(&iovad->fq_flush_start_cnt);
 505        iovad->flush_cb(iovad);
 506        atomic64_inc(&iovad->fq_flush_finish_cnt);
 507}
 508
 509static void fq_destroy_all_entries(struct iova_domain *iovad)
 510{
 511        int cpu;
 512
 513        /*
 514         * This code runs when the iova_domain is being detroyed, so don't
 515         * bother to free iovas, just call the entry_dtor on all remaining
 516         * entries.
 517         */
 518        if (!iovad->entry_dtor)
 519                return;
 520
 521        for_each_possible_cpu(cpu) {
 522                struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
 523                int idx;
 524
 525                fq_ring_for_each(idx, fq)
 526                        iovad->entry_dtor(fq->entries[idx].data);
 527        }
 528}
 529
 530static void fq_flush_timeout(struct timer_list *t)
 531{
 532        struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
 533        int cpu;
 534
 535        atomic_set(&iovad->fq_timer_on, 0);
 536        iova_domain_flush(iovad);
 537
 538        for_each_possible_cpu(cpu) {
 539                unsigned long flags;
 540                struct iova_fq *fq;
 541
 542                fq = per_cpu_ptr(iovad->fq, cpu);
 543                spin_lock_irqsave(&fq->lock, flags);
 544                fq_ring_free(iovad, fq);
 545                spin_unlock_irqrestore(&fq->lock, flags);
 546        }
 547}
 548
 549void queue_iova(struct iova_domain *iovad,
 550                unsigned long pfn, unsigned long pages,
 551                unsigned long data)
 552{
 553        struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
 554        unsigned long flags;
 555        unsigned idx;
 556
 557        spin_lock_irqsave(&fq->lock, flags);
 558
 559        /*
 560         * First remove all entries from the flush queue that have already been
 561         * flushed out on another CPU. This makes the fq_full() check below less
 562         * likely to be true.
 563         */
 564        fq_ring_free(iovad, fq);
 565
 566        if (fq_full(fq)) {
 567                iova_domain_flush(iovad);
 568                fq_ring_free(iovad, fq);
 569        }
 570
 571        idx = fq_ring_add(fq);
 572
 573        fq->entries[idx].iova_pfn = pfn;
 574        fq->entries[idx].pages    = pages;
 575        fq->entries[idx].data     = data;
 576        fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 577
 578        spin_unlock_irqrestore(&fq->lock, flags);
 579
 580        /* Avoid false sharing as much as possible. */
 581        if (!atomic_read(&iovad->fq_timer_on) &&
 582            !atomic_xchg(&iovad->fq_timer_on, 1))
 583                mod_timer(&iovad->fq_timer,
 584                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 585}
 586EXPORT_SYMBOL_GPL(queue_iova);
 587
 588/**
 589 * put_iova_domain - destroys the iova doamin
 590 * @iovad: - iova domain in question.
 591 * All the iova's in that domain are destroyed.
 592 */
 593void put_iova_domain(struct iova_domain *iovad)
 594{
 595        struct iova *iova, *tmp;
 596
 597        free_iova_flush_queue(iovad);
 598        free_iova_rcaches(iovad);
 599        rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
 600                free_iova_mem(iova);
 601}
 602EXPORT_SYMBOL_GPL(put_iova_domain);
 603
 604static int
 605__is_range_overlap(struct rb_node *node,
 606        unsigned long pfn_lo, unsigned long pfn_hi)
 607{
 608        struct iova *iova = rb_entry(node, struct iova, node);
 609
 610        if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 611                return 1;
 612        return 0;
 613}
 614
 615static inline struct iova *
 616alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 617{
 618        struct iova *iova;
 619
 620        iova = alloc_iova_mem();
 621        if (iova) {
 622                iova->pfn_lo = pfn_lo;
 623                iova->pfn_hi = pfn_hi;
 624        }
 625
 626        return iova;
 627}
 628
 629static struct iova *
 630__insert_new_range(struct iova_domain *iovad,
 631        unsigned long pfn_lo, unsigned long pfn_hi)
 632{
 633        struct iova *iova;
 634
 635        iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 636        if (iova)
 637                iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 638
 639        return iova;
 640}
 641
 642static void
 643__adjust_overlap_range(struct iova *iova,
 644        unsigned long *pfn_lo, unsigned long *pfn_hi)
 645{
 646        if (*pfn_lo < iova->pfn_lo)
 647                iova->pfn_lo = *pfn_lo;
 648        if (*pfn_hi > iova->pfn_hi)
 649                *pfn_lo = iova->pfn_hi + 1;
 650}
 651
 652/**
 653 * reserve_iova - reserves an iova in the given range
 654 * @iovad: - iova domain pointer
 655 * @pfn_lo: - lower page frame address
 656 * @pfn_hi:- higher pfn adderss
 657 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 658 * that this address is not dished out as part of alloc_iova.
 659 */
 660struct iova *
 661reserve_iova(struct iova_domain *iovad,
 662        unsigned long pfn_lo, unsigned long pfn_hi)
 663{
 664        struct rb_node *node;
 665        unsigned long flags;
 666        struct iova *iova;
 667        unsigned int overlap = 0;
 668
 669        /* Don't allow nonsensical pfns */
 670        if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
 671                return NULL;
 672
 673        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 674        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 675                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 676                        iova = rb_entry(node, struct iova, node);
 677                        __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 678                        if ((pfn_lo >= iova->pfn_lo) &&
 679                                (pfn_hi <= iova->pfn_hi))
 680                                goto finish;
 681                        overlap = 1;
 682
 683                } else if (overlap)
 684                                break;
 685        }
 686
 687        /* We are here either because this is the first reserver node
 688         * or need to insert remaining non overlap addr range
 689         */
 690        iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 691finish:
 692
 693        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 694        return iova;
 695}
 696EXPORT_SYMBOL_GPL(reserve_iova);
 697
 698/**
 699 * copy_reserved_iova - copies the reserved between domains
 700 * @from: - source doamin from where to copy
 701 * @to: - destination domin where to copy
 702 * This function copies reserved iova's from one doamin to
 703 * other.
 704 */
 705void
 706copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 707{
 708        unsigned long flags;
 709        struct rb_node *node;
 710
 711        spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 712        for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 713                struct iova *iova = rb_entry(node, struct iova, node);
 714                struct iova *new_iova;
 715
 716                if (iova->pfn_lo == IOVA_ANCHOR)
 717                        continue;
 718
 719                new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
 720                if (!new_iova)
 721                        pr_err("Reserve iova range %lx@%lx failed\n",
 722                               iova->pfn_lo, iova->pfn_lo);
 723        }
 724        spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 725}
 726EXPORT_SYMBOL_GPL(copy_reserved_iova);
 727
 728struct iova *
 729split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
 730                      unsigned long pfn_lo, unsigned long pfn_hi)
 731{
 732        unsigned long flags;
 733        struct iova *prev = NULL, *next = NULL;
 734
 735        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 736        if (iova->pfn_lo < pfn_lo) {
 737                prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
 738                if (prev == NULL)
 739                        goto error;
 740        }
 741        if (iova->pfn_hi > pfn_hi) {
 742                next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
 743                if (next == NULL)
 744                        goto error;
 745        }
 746
 747        __cached_rbnode_delete_update(iovad, iova);
 748        rb_erase(&iova->node, &iovad->rbroot);
 749
 750        if (prev) {
 751                iova_insert_rbtree(&iovad->rbroot, prev, NULL);
 752                iova->pfn_lo = pfn_lo;
 753        }
 754        if (next) {
 755                iova_insert_rbtree(&iovad->rbroot, next, NULL);
 756                iova->pfn_hi = pfn_hi;
 757        }
 758        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 759
 760        return iova;
 761
 762error:
 763        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 764        if (prev)
 765                free_iova_mem(prev);
 766        return NULL;
 767}
 768
 769/*
 770 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 771 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 772 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 773 * For simplicity, we use a static magazine size and don't implement the
 774 * dynamic size tuning described in the paper.
 775 */
 776
 777#define IOVA_MAG_SIZE 128
 778
 779struct iova_magazine {
 780        unsigned long size;
 781        unsigned long pfns[IOVA_MAG_SIZE];
 782};
 783
 784struct iova_cpu_rcache {
 785        spinlock_t lock;
 786        struct iova_magazine *loaded;
 787        struct iova_magazine *prev;
 788};
 789
 790static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 791{
 792        return kzalloc(sizeof(struct iova_magazine), flags);
 793}
 794
 795static void iova_magazine_free(struct iova_magazine *mag)
 796{
 797        kfree(mag);
 798}
 799
 800static void
 801iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 802{
 803        unsigned long flags;
 804        int i;
 805
 806        if (!mag)
 807                return;
 808
 809        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 810
 811        for (i = 0 ; i < mag->size; ++i) {
 812                struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 813
 814                if (WARN_ON(!iova))
 815                        continue;
 816
 817                private_free_iova(iovad, iova);
 818        }
 819
 820        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 821
 822        mag->size = 0;
 823}
 824
 825static bool iova_magazine_full(struct iova_magazine *mag)
 826{
 827        return (mag && mag->size == IOVA_MAG_SIZE);
 828}
 829
 830static bool iova_magazine_empty(struct iova_magazine *mag)
 831{
 832        return (!mag || mag->size == 0);
 833}
 834
 835static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 836                                       unsigned long limit_pfn)
 837{
 838        int i;
 839        unsigned long pfn;
 840
 841        BUG_ON(iova_magazine_empty(mag));
 842
 843        /* Only fall back to the rbtree if we have no suitable pfns at all */
 844        for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
 845                if (i == 0)
 846                        return 0;
 847
 848        /* Swap it to pop it */
 849        pfn = mag->pfns[i];
 850        mag->pfns[i] = mag->pfns[--mag->size];
 851
 852        return pfn;
 853}
 854
 855static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 856{
 857        BUG_ON(iova_magazine_full(mag));
 858
 859        mag->pfns[mag->size++] = pfn;
 860}
 861
 862static void init_iova_rcaches(struct iova_domain *iovad)
 863{
 864        struct iova_cpu_rcache *cpu_rcache;
 865        struct iova_rcache *rcache;
 866        unsigned int cpu;
 867        int i;
 868
 869        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 870                rcache = &iovad->rcaches[i];
 871                spin_lock_init(&rcache->lock);
 872                rcache->depot_size = 0;
 873                rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
 874                if (WARN_ON(!rcache->cpu_rcaches))
 875                        continue;
 876                for_each_possible_cpu(cpu) {
 877                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 878                        spin_lock_init(&cpu_rcache->lock);
 879                        cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 880                        cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 881                }
 882        }
 883}
 884
 885/*
 886 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 887 * return true on success.  Can fail if rcache is full and we can't free
 888 * space, and free_iova() (our only caller) will then return the IOVA
 889 * range to the rbtree instead.
 890 */
 891static bool __iova_rcache_insert(struct iova_domain *iovad,
 892                                 struct iova_rcache *rcache,
 893                                 unsigned long iova_pfn)
 894{
 895        struct iova_magazine *mag_to_free = NULL;
 896        struct iova_cpu_rcache *cpu_rcache;
 897        bool can_insert = false;
 898        unsigned long flags;
 899
 900        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 901        spin_lock_irqsave(&cpu_rcache->lock, flags);
 902
 903        if (!iova_magazine_full(cpu_rcache->loaded)) {
 904                can_insert = true;
 905        } else if (!iova_magazine_full(cpu_rcache->prev)) {
 906                swap(cpu_rcache->prev, cpu_rcache->loaded);
 907                can_insert = true;
 908        } else {
 909                struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 910
 911                if (new_mag) {
 912                        spin_lock(&rcache->lock);
 913                        if (rcache->depot_size < MAX_GLOBAL_MAGS) {
 914                                rcache->depot[rcache->depot_size++] =
 915                                                cpu_rcache->loaded;
 916                        } else {
 917                                mag_to_free = cpu_rcache->loaded;
 918                        }
 919                        spin_unlock(&rcache->lock);
 920
 921                        cpu_rcache->loaded = new_mag;
 922                        can_insert = true;
 923                }
 924        }
 925
 926        if (can_insert)
 927                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 928
 929        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 930
 931        if (mag_to_free) {
 932                iova_magazine_free_pfns(mag_to_free, iovad);
 933                iova_magazine_free(mag_to_free);
 934        }
 935
 936        return can_insert;
 937}
 938
 939static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 940                               unsigned long size)
 941{
 942        unsigned int log_size = order_base_2(size);
 943
 944        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 945                return false;
 946
 947        return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 948}
 949
 950/*
 951 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 952 * satisfy the request, return a matching non-NULL range and remove
 953 * it from the 'rcache'.
 954 */
 955static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 956                                       unsigned long limit_pfn)
 957{
 958        struct iova_cpu_rcache *cpu_rcache;
 959        unsigned long iova_pfn = 0;
 960        bool has_pfn = false;
 961        unsigned long flags;
 962
 963        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 964        spin_lock_irqsave(&cpu_rcache->lock, flags);
 965
 966        if (!iova_magazine_empty(cpu_rcache->loaded)) {
 967                has_pfn = true;
 968        } else if (!iova_magazine_empty(cpu_rcache->prev)) {
 969                swap(cpu_rcache->prev, cpu_rcache->loaded);
 970                has_pfn = true;
 971        } else {
 972                spin_lock(&rcache->lock);
 973                if (rcache->depot_size > 0) {
 974                        iova_magazine_free(cpu_rcache->loaded);
 975                        cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
 976                        has_pfn = true;
 977                }
 978                spin_unlock(&rcache->lock);
 979        }
 980
 981        if (has_pfn)
 982                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 983
 984        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 985
 986        return iova_pfn;
 987}
 988
 989/*
 990 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
 991 * size is too big or the DMA limit we are given isn't satisfied by the
 992 * top element in the magazine.
 993 */
 994static unsigned long iova_rcache_get(struct iova_domain *iovad,
 995                                     unsigned long size,
 996                                     unsigned long limit_pfn)
 997{
 998        unsigned int log_size = order_base_2(size);
 999
1000        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1001                return 0;
1002
1003        return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1004}
1005
1006/*
1007 * free rcache data structures.
1008 */
1009static void free_iova_rcaches(struct iova_domain *iovad)
1010{
1011        struct iova_rcache *rcache;
1012        struct iova_cpu_rcache *cpu_rcache;
1013        unsigned int cpu;
1014        int i, j;
1015
1016        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1017                rcache = &iovad->rcaches[i];
1018                for_each_possible_cpu(cpu) {
1019                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1020                        iova_magazine_free(cpu_rcache->loaded);
1021                        iova_magazine_free(cpu_rcache->prev);
1022                }
1023                free_percpu(rcache->cpu_rcaches);
1024                for (j = 0; j < rcache->depot_size; ++j)
1025                        iova_magazine_free(rcache->depot[j]);
1026        }
1027}
1028
1029/*
1030 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1031 */
1032void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1033{
1034        struct iova_cpu_rcache *cpu_rcache;
1035        struct iova_rcache *rcache;
1036        unsigned long flags;
1037        int i;
1038
1039        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1040                rcache = &iovad->rcaches[i];
1041                cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1042                spin_lock_irqsave(&cpu_rcache->lock, flags);
1043                iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1044                iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1045                spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1046        }
1047}
1048
1049MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1050MODULE_LICENSE("GPL");
1051