linux/drivers/iommu/iova.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2006-2009, Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  18 */
  19
  20#include <linux/iova.h>
  21#include <linux/module.h>
  22#include <linux/slab.h>
  23#include <linux/smp.h>
  24#include <linux/bitops.h>
  25#include <linux/cpu.h>
  26
  27static bool iova_rcache_insert(struct iova_domain *iovad,
  28                               unsigned long pfn,
  29                               unsigned long size);
  30static unsigned long iova_rcache_get(struct iova_domain *iovad,
  31                                     unsigned long size,
  32                                     unsigned long limit_pfn);
  33static void init_iova_rcaches(struct iova_domain *iovad);
  34static void free_iova_rcaches(struct iova_domain *iovad);
  35static void fq_destroy_all_entries(struct iova_domain *iovad);
  36static void fq_flush_timeout(unsigned long data);
  37
  38void
  39init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  40        unsigned long start_pfn, unsigned long pfn_32bit)
  41{
  42        /*
  43         * IOVA granularity will normally be equal to the smallest
  44         * supported IOMMU page size; both *must* be capable of
  45         * representing individual CPU pages exactly.
  46         */
  47        BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  48
  49        spin_lock_init(&iovad->iova_rbtree_lock);
  50        iovad->rbroot = RB_ROOT;
  51        iovad->cached32_node = NULL;
  52        iovad->granule = granule;
  53        iovad->start_pfn = start_pfn;
  54        iovad->dma_32bit_pfn = pfn_32bit + 1;
  55        iovad->flush_cb = NULL;
  56        iovad->fq = NULL;
  57        init_iova_rcaches(iovad);
  58}
  59EXPORT_SYMBOL_GPL(init_iova_domain);
  60
  61static void free_iova_flush_queue(struct iova_domain *iovad)
  62{
  63        if (!iovad->fq)
  64                return;
  65
  66        if (timer_pending(&iovad->fq_timer))
  67                del_timer(&iovad->fq_timer);
  68
  69        fq_destroy_all_entries(iovad);
  70
  71        free_percpu(iovad->fq);
  72
  73        iovad->fq         = NULL;
  74        iovad->flush_cb   = NULL;
  75        iovad->entry_dtor = NULL;
  76}
  77
  78int init_iova_flush_queue(struct iova_domain *iovad,
  79                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
  80{
  81        int cpu;
  82
  83        atomic64_set(&iovad->fq_flush_start_cnt,  0);
  84        atomic64_set(&iovad->fq_flush_finish_cnt, 0);
  85
  86        iovad->fq = alloc_percpu(struct iova_fq);
  87        if (!iovad->fq)
  88                return -ENOMEM;
  89
  90        iovad->flush_cb   = flush_cb;
  91        iovad->entry_dtor = entry_dtor;
  92
  93        for_each_possible_cpu(cpu) {
  94                struct iova_fq *fq;
  95
  96                fq = per_cpu_ptr(iovad->fq, cpu);
  97                fq->head = 0;
  98                fq->tail = 0;
  99
 100                spin_lock_init(&fq->lock);
 101        }
 102
 103        setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad);
 104        atomic_set(&iovad->fq_timer_on, 0);
 105
 106        return 0;
 107}
 108EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 109
 110static struct rb_node *
 111__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 112{
 113        if ((*limit_pfn > iovad->dma_32bit_pfn) ||
 114                (iovad->cached32_node == NULL))
 115                return rb_last(&iovad->rbroot);
 116        else {
 117                struct rb_node *prev_node = rb_prev(iovad->cached32_node);
 118                struct iova *curr_iova =
 119                        rb_entry(iovad->cached32_node, struct iova, node);
 120                *limit_pfn = curr_iova->pfn_lo;
 121                return prev_node;
 122        }
 123}
 124
 125static void
 126__cached_rbnode_insert_update(struct iova_domain *iovad,
 127        unsigned long limit_pfn, struct iova *new)
 128{
 129        if (limit_pfn != iovad->dma_32bit_pfn)
 130                return;
 131        iovad->cached32_node = &new->node;
 132}
 133
 134static void
 135__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 136{
 137        struct iova *cached_iova;
 138        struct rb_node *curr;
 139
 140        if (!iovad->cached32_node)
 141                return;
 142        curr = iovad->cached32_node;
 143        cached_iova = rb_entry(curr, struct iova, node);
 144
 145        if (free->pfn_lo >= cached_iova->pfn_lo) {
 146                struct rb_node *node = rb_next(&free->node);
 147                struct iova *iova = rb_entry(node, struct iova, node);
 148
 149                /* only cache if it's below 32bit pfn */
 150                if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
 151                        iovad->cached32_node = node;
 152                else
 153                        iovad->cached32_node = NULL;
 154        }
 155}
 156
 157/* Insert the iova into domain rbtree by holding writer lock */
 158static void
 159iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 160                   struct rb_node *start)
 161{
 162        struct rb_node **new, *parent = NULL;
 163
 164        new = (start) ? &start : &(root->rb_node);
 165        /* Figure out where to put new node */
 166        while (*new) {
 167                struct iova *this = rb_entry(*new, struct iova, node);
 168
 169                parent = *new;
 170
 171                if (iova->pfn_lo < this->pfn_lo)
 172                        new = &((*new)->rb_left);
 173                else if (iova->pfn_lo > this->pfn_lo)
 174                        new = &((*new)->rb_right);
 175                else {
 176                        WARN_ON(1); /* this should not happen */
 177                        return;
 178                }
 179        }
 180        /* Add new node and rebalance tree. */
 181        rb_link_node(&iova->node, parent, new);
 182        rb_insert_color(&iova->node, root);
 183}
 184
 185/*
 186 * Computes the padding size required, to make the start address
 187 * naturally aligned on the power-of-two order of its size
 188 */
 189static unsigned int
 190iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
 191{
 192        return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
 193}
 194
 195static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 196                unsigned long size, unsigned long limit_pfn,
 197                        struct iova *new, bool size_aligned)
 198{
 199        struct rb_node *prev, *curr = NULL;
 200        unsigned long flags;
 201        unsigned long saved_pfn;
 202        unsigned int pad_size = 0;
 203
 204        /* Walk the tree backwards */
 205        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 206        saved_pfn = limit_pfn;
 207        curr = __get_cached_rbnode(iovad, &limit_pfn);
 208        prev = curr;
 209        while (curr) {
 210                struct iova *curr_iova = rb_entry(curr, struct iova, node);
 211
 212                if (limit_pfn <= curr_iova->pfn_lo) {
 213                        goto move_left;
 214                } else if (limit_pfn > curr_iova->pfn_hi) {
 215                        if (size_aligned)
 216                                pad_size = iova_get_pad_size(size, limit_pfn);
 217                        if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
 218                                break;  /* found a free slot */
 219                }
 220                limit_pfn = curr_iova->pfn_lo;
 221move_left:
 222                prev = curr;
 223                curr = rb_prev(curr);
 224        }
 225
 226        if (!curr) {
 227                if (size_aligned)
 228                        pad_size = iova_get_pad_size(size, limit_pfn);
 229                if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
 230                        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 231                        return -ENOMEM;
 232                }
 233        }
 234
 235        /* pfn_lo will point to size aligned address if size_aligned is set */
 236        new->pfn_lo = limit_pfn - (size + pad_size);
 237        new->pfn_hi = new->pfn_lo + size - 1;
 238
 239        /* If we have 'prev', it's a valid place to start the insertion. */
 240        iova_insert_rbtree(&iovad->rbroot, new, prev);
 241        __cached_rbnode_insert_update(iovad, saved_pfn, new);
 242
 243        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 244
 245
 246        return 0;
 247}
 248
 249static struct kmem_cache *iova_cache;
 250static unsigned int iova_cache_users;
 251static DEFINE_MUTEX(iova_cache_mutex);
 252
 253struct iova *alloc_iova_mem(void)
 254{
 255        return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
 256}
 257EXPORT_SYMBOL(alloc_iova_mem);
 258
 259void free_iova_mem(struct iova *iova)
 260{
 261        kmem_cache_free(iova_cache, iova);
 262}
 263EXPORT_SYMBOL(free_iova_mem);
 264
 265int iova_cache_get(void)
 266{
 267        mutex_lock(&iova_cache_mutex);
 268        if (!iova_cache_users) {
 269                iova_cache = kmem_cache_create(
 270                        "iommu_iova", sizeof(struct iova), 0,
 271                        SLAB_HWCACHE_ALIGN, NULL);
 272                if (!iova_cache) {
 273                        mutex_unlock(&iova_cache_mutex);
 274                        printk(KERN_ERR "Couldn't create iova cache\n");
 275                        return -ENOMEM;
 276                }
 277        }
 278
 279        iova_cache_users++;
 280        mutex_unlock(&iova_cache_mutex);
 281
 282        return 0;
 283}
 284EXPORT_SYMBOL_GPL(iova_cache_get);
 285
 286void iova_cache_put(void)
 287{
 288        mutex_lock(&iova_cache_mutex);
 289        if (WARN_ON(!iova_cache_users)) {
 290                mutex_unlock(&iova_cache_mutex);
 291                return;
 292        }
 293        iova_cache_users--;
 294        if (!iova_cache_users)
 295                kmem_cache_destroy(iova_cache);
 296        mutex_unlock(&iova_cache_mutex);
 297}
 298EXPORT_SYMBOL_GPL(iova_cache_put);
 299
 300/**
 301 * alloc_iova - allocates an iova
 302 * @iovad: - iova domain in question
 303 * @size: - size of page frames to allocate
 304 * @limit_pfn: - max limit address
 305 * @size_aligned: - set if size_aligned address range is required
 306 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 307 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 308 * flag is set then the allocated address iova->pfn_lo will be naturally
 309 * aligned on roundup_power_of_two(size).
 310 */
 311struct iova *
 312alloc_iova(struct iova_domain *iovad, unsigned long size,
 313        unsigned long limit_pfn,
 314        bool size_aligned)
 315{
 316        struct iova *new_iova;
 317        int ret;
 318
 319        new_iova = alloc_iova_mem();
 320        if (!new_iova)
 321                return NULL;
 322
 323        ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 324                        new_iova, size_aligned);
 325
 326        if (ret) {
 327                free_iova_mem(new_iova);
 328                return NULL;
 329        }
 330
 331        return new_iova;
 332}
 333EXPORT_SYMBOL_GPL(alloc_iova);
 334
 335static struct iova *
 336private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 337{
 338        struct rb_node *node = iovad->rbroot.rb_node;
 339
 340        assert_spin_locked(&iovad->iova_rbtree_lock);
 341
 342        while (node) {
 343                struct iova *iova = rb_entry(node, struct iova, node);
 344
 345                /* If pfn falls within iova's range, return iova */
 346                if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
 347                        return iova;
 348                }
 349
 350                if (pfn < iova->pfn_lo)
 351                        node = node->rb_left;
 352                else if (pfn > iova->pfn_lo)
 353                        node = node->rb_right;
 354        }
 355
 356        return NULL;
 357}
 358
 359static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
 360{
 361        assert_spin_locked(&iovad->iova_rbtree_lock);
 362        __cached_rbnode_delete_update(iovad, iova);
 363        rb_erase(&iova->node, &iovad->rbroot);
 364        free_iova_mem(iova);
 365}
 366
 367/**
 368 * find_iova - finds an iova for a given pfn
 369 * @iovad: - iova domain in question.
 370 * @pfn: - page frame number
 371 * This function finds and returns an iova belonging to the
 372 * given doamin which matches the given pfn.
 373 */
 374struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 375{
 376        unsigned long flags;
 377        struct iova *iova;
 378
 379        /* Take the lock so that no other thread is manipulating the rbtree */
 380        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 381        iova = private_find_iova(iovad, pfn);
 382        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 383        return iova;
 384}
 385EXPORT_SYMBOL_GPL(find_iova);
 386
 387/**
 388 * __free_iova - frees the given iova
 389 * @iovad: iova domain in question.
 390 * @iova: iova in question.
 391 * Frees the given iova belonging to the giving domain
 392 */
 393void
 394__free_iova(struct iova_domain *iovad, struct iova *iova)
 395{
 396        unsigned long flags;
 397
 398        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 399        private_free_iova(iovad, iova);
 400        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 401}
 402EXPORT_SYMBOL_GPL(__free_iova);
 403
 404/**
 405 * free_iova - finds and frees the iova for a given pfn
 406 * @iovad: - iova domain in question.
 407 * @pfn: - pfn that is allocated previously
 408 * This functions finds an iova for a given pfn and then
 409 * frees the iova from that domain.
 410 */
 411void
 412free_iova(struct iova_domain *iovad, unsigned long pfn)
 413{
 414        struct iova *iova = find_iova(iovad, pfn);
 415
 416        if (iova)
 417                __free_iova(iovad, iova);
 418
 419}
 420EXPORT_SYMBOL_GPL(free_iova);
 421
 422/**
 423 * alloc_iova_fast - allocates an iova from rcache
 424 * @iovad: - iova domain in question
 425 * @size: - size of page frames to allocate
 426 * @limit_pfn: - max limit address
 427 * This function tries to satisfy an iova allocation from the rcache,
 428 * and falls back to regular allocation on failure.
 429*/
 430unsigned long
 431alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 432                unsigned long limit_pfn)
 433{
 434        bool flushed_rcache = false;
 435        unsigned long iova_pfn;
 436        struct iova *new_iova;
 437
 438        iova_pfn = iova_rcache_get(iovad, size, limit_pfn);
 439        if (iova_pfn)
 440                return iova_pfn;
 441
 442retry:
 443        new_iova = alloc_iova(iovad, size, limit_pfn, true);
 444        if (!new_iova) {
 445                unsigned int cpu;
 446
 447                if (flushed_rcache)
 448                        return 0;
 449
 450                /* Try replenishing IOVAs by flushing rcache. */
 451                flushed_rcache = true;
 452                for_each_online_cpu(cpu)
 453                        free_cpu_cached_iovas(cpu, iovad);
 454                goto retry;
 455        }
 456
 457        return new_iova->pfn_lo;
 458}
 459EXPORT_SYMBOL_GPL(alloc_iova_fast);
 460
 461/**
 462 * free_iova_fast - free iova pfn range into rcache
 463 * @iovad: - iova domain in question.
 464 * @pfn: - pfn that is allocated previously
 465 * @size: - # of pages in range
 466 * This functions frees an iova range by trying to put it into the rcache,
 467 * falling back to regular iova deallocation via free_iova() if this fails.
 468 */
 469void
 470free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 471{
 472        if (iova_rcache_insert(iovad, pfn, size))
 473                return;
 474
 475        free_iova(iovad, pfn);
 476}
 477EXPORT_SYMBOL_GPL(free_iova_fast);
 478
 479#define fq_ring_for_each(i, fq) \
 480        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 481
 482static inline bool fq_full(struct iova_fq *fq)
 483{
 484        assert_spin_locked(&fq->lock);
 485        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 486}
 487
 488static inline unsigned fq_ring_add(struct iova_fq *fq)
 489{
 490        unsigned idx = fq->tail;
 491
 492        assert_spin_locked(&fq->lock);
 493
 494        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 495
 496        return idx;
 497}
 498
 499static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 500{
 501        u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 502        unsigned idx;
 503
 504        assert_spin_locked(&fq->lock);
 505
 506        fq_ring_for_each(idx, fq) {
 507
 508                if (fq->entries[idx].counter >= counter)
 509                        break;
 510
 511                if (iovad->entry_dtor)
 512                        iovad->entry_dtor(fq->entries[idx].data);
 513
 514                free_iova_fast(iovad,
 515                               fq->entries[idx].iova_pfn,
 516                               fq->entries[idx].pages);
 517
 518                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 519        }
 520}
 521
 522static void iova_domain_flush(struct iova_domain *iovad)
 523{
 524        atomic64_inc(&iovad->fq_flush_start_cnt);
 525        iovad->flush_cb(iovad);
 526        atomic64_inc(&iovad->fq_flush_finish_cnt);
 527}
 528
 529static void fq_destroy_all_entries(struct iova_domain *iovad)
 530{
 531        int cpu;
 532
 533        /*
 534         * This code runs when the iova_domain is being detroyed, so don't
 535         * bother to free iovas, just call the entry_dtor on all remaining
 536         * entries.
 537         */
 538        if (!iovad->entry_dtor)
 539                return;
 540
 541        for_each_possible_cpu(cpu) {
 542                struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
 543                int idx;
 544
 545                fq_ring_for_each(idx, fq)
 546                        iovad->entry_dtor(fq->entries[idx].data);
 547        }
 548}
 549
 550static void fq_flush_timeout(unsigned long data)
 551{
 552        struct iova_domain *iovad = (struct iova_domain *)data;
 553        int cpu;
 554
 555        atomic_set(&iovad->fq_timer_on, 0);
 556        iova_domain_flush(iovad);
 557
 558        for_each_possible_cpu(cpu) {
 559                unsigned long flags;
 560                struct iova_fq *fq;
 561
 562                fq = per_cpu_ptr(iovad->fq, cpu);
 563                spin_lock_irqsave(&fq->lock, flags);
 564                fq_ring_free(iovad, fq);
 565                spin_unlock_irqrestore(&fq->lock, flags);
 566        }
 567}
 568
 569void queue_iova(struct iova_domain *iovad,
 570                unsigned long pfn, unsigned long pages,
 571                unsigned long data)
 572{
 573        struct iova_fq *fq = get_cpu_ptr(iovad->fq);
 574        unsigned long flags;
 575        unsigned idx;
 576
 577        spin_lock_irqsave(&fq->lock, flags);
 578
 579        /*
 580         * First remove all entries from the flush queue that have already been
 581         * flushed out on another CPU. This makes the fq_full() check below less
 582         * likely to be true.
 583         */
 584        fq_ring_free(iovad, fq);
 585
 586        if (fq_full(fq)) {
 587                iova_domain_flush(iovad);
 588                fq_ring_free(iovad, fq);
 589        }
 590
 591        idx = fq_ring_add(fq);
 592
 593        fq->entries[idx].iova_pfn = pfn;
 594        fq->entries[idx].pages    = pages;
 595        fq->entries[idx].data     = data;
 596        fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 597
 598        spin_unlock_irqrestore(&fq->lock, flags);
 599
 600        if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
 601                mod_timer(&iovad->fq_timer,
 602                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 603
 604        put_cpu_ptr(iovad->fq);
 605}
 606EXPORT_SYMBOL_GPL(queue_iova);
 607
 608/**
 609 * put_iova_domain - destroys the iova doamin
 610 * @iovad: - iova domain in question.
 611 * All the iova's in that domain are destroyed.
 612 */
 613void put_iova_domain(struct iova_domain *iovad)
 614{
 615        struct rb_node *node;
 616        unsigned long flags;
 617
 618        free_iova_flush_queue(iovad);
 619        free_iova_rcaches(iovad);
 620        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 621        node = rb_first(&iovad->rbroot);
 622        while (node) {
 623                struct iova *iova = rb_entry(node, struct iova, node);
 624
 625                rb_erase(node, &iovad->rbroot);
 626                free_iova_mem(iova);
 627                node = rb_first(&iovad->rbroot);
 628        }
 629        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 630}
 631EXPORT_SYMBOL_GPL(put_iova_domain);
 632
 633static int
 634__is_range_overlap(struct rb_node *node,
 635        unsigned long pfn_lo, unsigned long pfn_hi)
 636{
 637        struct iova *iova = rb_entry(node, struct iova, node);
 638
 639        if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 640                return 1;
 641        return 0;
 642}
 643
 644static inline struct iova *
 645alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 646{
 647        struct iova *iova;
 648
 649        iova = alloc_iova_mem();
 650        if (iova) {
 651                iova->pfn_lo = pfn_lo;
 652                iova->pfn_hi = pfn_hi;
 653        }
 654
 655        return iova;
 656}
 657
 658static struct iova *
 659__insert_new_range(struct iova_domain *iovad,
 660        unsigned long pfn_lo, unsigned long pfn_hi)
 661{
 662        struct iova *iova;
 663
 664        iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 665        if (iova)
 666                iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 667
 668        return iova;
 669}
 670
 671static void
 672__adjust_overlap_range(struct iova *iova,
 673        unsigned long *pfn_lo, unsigned long *pfn_hi)
 674{
 675        if (*pfn_lo < iova->pfn_lo)
 676                iova->pfn_lo = *pfn_lo;
 677        if (*pfn_hi > iova->pfn_hi)
 678                *pfn_lo = iova->pfn_hi + 1;
 679}
 680
 681/**
 682 * reserve_iova - reserves an iova in the given range
 683 * @iovad: - iova domain pointer
 684 * @pfn_lo: - lower page frame address
 685 * @pfn_hi:- higher pfn adderss
 686 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 687 * that this address is not dished out as part of alloc_iova.
 688 */
 689struct iova *
 690reserve_iova(struct iova_domain *iovad,
 691        unsigned long pfn_lo, unsigned long pfn_hi)
 692{
 693        struct rb_node *node;
 694        unsigned long flags;
 695        struct iova *iova;
 696        unsigned int overlap = 0;
 697
 698        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 699        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 700                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 701                        iova = rb_entry(node, struct iova, node);
 702                        __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 703                        if ((pfn_lo >= iova->pfn_lo) &&
 704                                (pfn_hi <= iova->pfn_hi))
 705                                goto finish;
 706                        overlap = 1;
 707
 708                } else if (overlap)
 709                                break;
 710        }
 711
 712        /* We are here either because this is the first reserver node
 713         * or need to insert remaining non overlap addr range
 714         */
 715        iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 716finish:
 717
 718        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 719        return iova;
 720}
 721EXPORT_SYMBOL_GPL(reserve_iova);
 722
 723/**
 724 * copy_reserved_iova - copies the reserved between domains
 725 * @from: - source doamin from where to copy
 726 * @to: - destination domin where to copy
 727 * This function copies reserved iova's from one doamin to
 728 * other.
 729 */
 730void
 731copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 732{
 733        unsigned long flags;
 734        struct rb_node *node;
 735
 736        spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 737        for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 738                struct iova *iova = rb_entry(node, struct iova, node);
 739                struct iova *new_iova;
 740
 741                new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
 742                if (!new_iova)
 743                        printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 744                                iova->pfn_lo, iova->pfn_lo);
 745        }
 746        spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 747}
 748EXPORT_SYMBOL_GPL(copy_reserved_iova);
 749
 750struct iova *
 751split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
 752                      unsigned long pfn_lo, unsigned long pfn_hi)
 753{
 754        unsigned long flags;
 755        struct iova *prev = NULL, *next = NULL;
 756
 757        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 758        if (iova->pfn_lo < pfn_lo) {
 759                prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
 760                if (prev == NULL)
 761                        goto error;
 762        }
 763        if (iova->pfn_hi > pfn_hi) {
 764                next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
 765                if (next == NULL)
 766                        goto error;
 767        }
 768
 769        __cached_rbnode_delete_update(iovad, iova);
 770        rb_erase(&iova->node, &iovad->rbroot);
 771
 772        if (prev) {
 773                iova_insert_rbtree(&iovad->rbroot, prev, NULL);
 774                iova->pfn_lo = pfn_lo;
 775        }
 776        if (next) {
 777                iova_insert_rbtree(&iovad->rbroot, next, NULL);
 778                iova->pfn_hi = pfn_hi;
 779        }
 780        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 781
 782        return iova;
 783
 784error:
 785        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 786        if (prev)
 787                free_iova_mem(prev);
 788        return NULL;
 789}
 790
 791/*
 792 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 793 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 794 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 795 * For simplicity, we use a static magazine size and don't implement the
 796 * dynamic size tuning described in the paper.
 797 */
 798
 799#define IOVA_MAG_SIZE 128
 800
 801struct iova_magazine {
 802        unsigned long size;
 803        unsigned long pfns[IOVA_MAG_SIZE];
 804};
 805
 806struct iova_cpu_rcache {
 807        spinlock_t lock;
 808        struct iova_magazine *loaded;
 809        struct iova_magazine *prev;
 810};
 811
 812static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 813{
 814        return kzalloc(sizeof(struct iova_magazine), flags);
 815}
 816
 817static void iova_magazine_free(struct iova_magazine *mag)
 818{
 819        kfree(mag);
 820}
 821
 822static void
 823iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 824{
 825        unsigned long flags;
 826        int i;
 827
 828        if (!mag)
 829                return;
 830
 831        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 832
 833        for (i = 0 ; i < mag->size; ++i) {
 834                struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 835
 836                BUG_ON(!iova);
 837                private_free_iova(iovad, iova);
 838        }
 839
 840        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 841
 842        mag->size = 0;
 843}
 844
 845static bool iova_magazine_full(struct iova_magazine *mag)
 846{
 847        return (mag && mag->size == IOVA_MAG_SIZE);
 848}
 849
 850static bool iova_magazine_empty(struct iova_magazine *mag)
 851{
 852        return (!mag || mag->size == 0);
 853}
 854
 855static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 856                                       unsigned long limit_pfn)
 857{
 858        BUG_ON(iova_magazine_empty(mag));
 859
 860        if (mag->pfns[mag->size - 1] >= limit_pfn)
 861                return 0;
 862
 863        return mag->pfns[--mag->size];
 864}
 865
 866static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 867{
 868        BUG_ON(iova_magazine_full(mag));
 869
 870        mag->pfns[mag->size++] = pfn;
 871}
 872
 873static void init_iova_rcaches(struct iova_domain *iovad)
 874{
 875        struct iova_cpu_rcache *cpu_rcache;
 876        struct iova_rcache *rcache;
 877        unsigned int cpu;
 878        int i;
 879
 880        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 881                rcache = &iovad->rcaches[i];
 882                spin_lock_init(&rcache->lock);
 883                rcache->depot_size = 0;
 884                rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
 885                if (WARN_ON(!rcache->cpu_rcaches))
 886                        continue;
 887                for_each_possible_cpu(cpu) {
 888                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 889                        spin_lock_init(&cpu_rcache->lock);
 890                        cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 891                        cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 892                }
 893        }
 894}
 895
 896/*
 897 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 898 * return true on success.  Can fail if rcache is full and we can't free
 899 * space, and free_iova() (our only caller) will then return the IOVA
 900 * range to the rbtree instead.
 901 */
 902static bool __iova_rcache_insert(struct iova_domain *iovad,
 903                                 struct iova_rcache *rcache,
 904                                 unsigned long iova_pfn)
 905{
 906        struct iova_magazine *mag_to_free = NULL;
 907        struct iova_cpu_rcache *cpu_rcache;
 908        bool can_insert = false;
 909        unsigned long flags;
 910
 911        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 912        spin_lock_irqsave(&cpu_rcache->lock, flags);
 913
 914        if (!iova_magazine_full(cpu_rcache->loaded)) {
 915                can_insert = true;
 916        } else if (!iova_magazine_full(cpu_rcache->prev)) {
 917                swap(cpu_rcache->prev, cpu_rcache->loaded);
 918                can_insert = true;
 919        } else {
 920                struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 921
 922                if (new_mag) {
 923                        spin_lock(&rcache->lock);
 924                        if (rcache->depot_size < MAX_GLOBAL_MAGS) {
 925                                rcache->depot[rcache->depot_size++] =
 926                                                cpu_rcache->loaded;
 927                        } else {
 928                                mag_to_free = cpu_rcache->loaded;
 929                        }
 930                        spin_unlock(&rcache->lock);
 931
 932                        cpu_rcache->loaded = new_mag;
 933                        can_insert = true;
 934                }
 935        }
 936
 937        if (can_insert)
 938                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 939
 940        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 941
 942        if (mag_to_free) {
 943                iova_magazine_free_pfns(mag_to_free, iovad);
 944                iova_magazine_free(mag_to_free);
 945        }
 946
 947        return can_insert;
 948}
 949
 950static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 951                               unsigned long size)
 952{
 953        unsigned int log_size = order_base_2(size);
 954
 955        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 956                return false;
 957
 958        return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 959}
 960
 961/*
 962 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 963 * satisfy the request, return a matching non-NULL range and remove
 964 * it from the 'rcache'.
 965 */
 966static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 967                                       unsigned long limit_pfn)
 968{
 969        struct iova_cpu_rcache *cpu_rcache;
 970        unsigned long iova_pfn = 0;
 971        bool has_pfn = false;
 972        unsigned long flags;
 973
 974        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 975        spin_lock_irqsave(&cpu_rcache->lock, flags);
 976
 977        if (!iova_magazine_empty(cpu_rcache->loaded)) {
 978                has_pfn = true;
 979        } else if (!iova_magazine_empty(cpu_rcache->prev)) {
 980                swap(cpu_rcache->prev, cpu_rcache->loaded);
 981                has_pfn = true;
 982        } else {
 983                spin_lock(&rcache->lock);
 984                if (rcache->depot_size > 0) {
 985                        iova_magazine_free(cpu_rcache->loaded);
 986                        cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
 987                        has_pfn = true;
 988                }
 989                spin_unlock(&rcache->lock);
 990        }
 991
 992        if (has_pfn)
 993                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 994
 995        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 996
 997        return iova_pfn;
 998}
 999
1000/*
1001 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
1002 * size is too big or the DMA limit we are given isn't satisfied by the
1003 * top element in the magazine.
1004 */
1005static unsigned long iova_rcache_get(struct iova_domain *iovad,
1006                                     unsigned long size,
1007                                     unsigned long limit_pfn)
1008{
1009        unsigned int log_size = order_base_2(size);
1010
1011        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1012                return 0;
1013
1014        return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn);
1015}
1016
1017/*
1018 * Free a cpu's rcache.
1019 */
1020static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
1021                                 struct iova_rcache *rcache)
1022{
1023        struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1024        unsigned long flags;
1025
1026        spin_lock_irqsave(&cpu_rcache->lock, flags);
1027
1028        iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1029        iova_magazine_free(cpu_rcache->loaded);
1030
1031        iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1032        iova_magazine_free(cpu_rcache->prev);
1033
1034        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1035}
1036
1037/*
1038 * free rcache data structures.
1039 */
1040static void free_iova_rcaches(struct iova_domain *iovad)
1041{
1042        struct iova_rcache *rcache;
1043        unsigned long flags;
1044        unsigned int cpu;
1045        int i, j;
1046
1047        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1048                rcache = &iovad->rcaches[i];
1049                for_each_possible_cpu(cpu)
1050                        free_cpu_iova_rcache(cpu, iovad, rcache);
1051                spin_lock_irqsave(&rcache->lock, flags);
1052                free_percpu(rcache->cpu_rcaches);
1053                for (j = 0; j < rcache->depot_size; ++j) {
1054                        iova_magazine_free_pfns(rcache->depot[j], iovad);
1055                        iova_magazine_free(rcache->depot[j]);
1056                }
1057                spin_unlock_irqrestore(&rcache->lock, flags);
1058        }
1059}
1060
1061/*
1062 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1063 */
1064void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1065{
1066        struct iova_cpu_rcache *cpu_rcache;
1067        struct iova_rcache *rcache;
1068        unsigned long flags;
1069        int i;
1070
1071        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1072                rcache = &iovad->rcaches[i];
1073                cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1074                spin_lock_irqsave(&cpu_rcache->lock, flags);
1075                iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1076                iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1077                spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1078        }
1079}
1080
1081MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1082MODULE_LICENSE("GPL");
1083