linux/drivers/iommu/iova.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2009, Intel Corporation.
   4 *
   5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   6 */
   7
   8#include <linux/iova.h>
   9#include <linux/module.h>
  10#include <linux/slab.h>
  11#include <linux/smp.h>
  12#include <linux/bitops.h>
  13#include <linux/cpu.h>
  14
  15/* The anchor node sits above the top of the usable address space */
  16#define IOVA_ANCHOR     ~0UL
  17
  18static bool iova_rcache_insert(struct iova_domain *iovad,
  19                               unsigned long pfn,
  20                               unsigned long size);
  21static unsigned long iova_rcache_get(struct iova_domain *iovad,
  22                                     unsigned long size,
  23                                     unsigned long limit_pfn);
  24static void init_iova_rcaches(struct iova_domain *iovad);
  25static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
  26static void free_iova_rcaches(struct iova_domain *iovad);
  27static void fq_destroy_all_entries(struct iova_domain *iovad);
  28static void fq_flush_timeout(struct timer_list *t);
  29
  30static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
  31{
  32        struct iova_domain *iovad;
  33
  34        iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
  35
  36        free_cpu_cached_iovas(cpu, iovad);
  37        return 0;
  38}
  39
  40static void free_global_cached_iovas(struct iova_domain *iovad);
  41
  42static struct iova *to_iova(struct rb_node *node)
  43{
  44        return rb_entry(node, struct iova, node);
  45}
  46
  47void
  48init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  49        unsigned long start_pfn)
  50{
  51        /*
  52         * IOVA granularity will normally be equal to the smallest
  53         * supported IOMMU page size; both *must* be capable of
  54         * representing individual CPU pages exactly.
  55         */
  56        BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  57
  58        spin_lock_init(&iovad->iova_rbtree_lock);
  59        iovad->rbroot = RB_ROOT;
  60        iovad->cached_node = &iovad->anchor.node;
  61        iovad->cached32_node = &iovad->anchor.node;
  62        iovad->granule = granule;
  63        iovad->start_pfn = start_pfn;
  64        iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  65        iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  66        iovad->flush_cb = NULL;
  67        iovad->fq = NULL;
  68        iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  69        rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  70        rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  71        cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead);
  72        init_iova_rcaches(iovad);
  73}
  74EXPORT_SYMBOL_GPL(init_iova_domain);
  75
  76static bool has_iova_flush_queue(struct iova_domain *iovad)
  77{
  78        return !!iovad->fq;
  79}
  80
  81static void free_iova_flush_queue(struct iova_domain *iovad)
  82{
  83        if (!has_iova_flush_queue(iovad))
  84                return;
  85
  86        if (timer_pending(&iovad->fq_timer))
  87                del_timer(&iovad->fq_timer);
  88
  89        fq_destroy_all_entries(iovad);
  90
  91        free_percpu(iovad->fq);
  92
  93        iovad->fq         = NULL;
  94        iovad->flush_cb   = NULL;
  95        iovad->entry_dtor = NULL;
  96}
  97
  98int init_iova_flush_queue(struct iova_domain *iovad,
  99                          iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
 100{
 101        struct iova_fq __percpu *queue;
 102        int cpu;
 103
 104        atomic64_set(&iovad->fq_flush_start_cnt,  0);
 105        atomic64_set(&iovad->fq_flush_finish_cnt, 0);
 106
 107        queue = alloc_percpu(struct iova_fq);
 108        if (!queue)
 109                return -ENOMEM;
 110
 111        iovad->flush_cb   = flush_cb;
 112        iovad->entry_dtor = entry_dtor;
 113
 114        for_each_possible_cpu(cpu) {
 115                struct iova_fq *fq;
 116
 117                fq = per_cpu_ptr(queue, cpu);
 118                fq->head = 0;
 119                fq->tail = 0;
 120
 121                spin_lock_init(&fq->lock);
 122        }
 123
 124        iovad->fq = queue;
 125
 126        timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 127        atomic_set(&iovad->fq_timer_on, 0);
 128
 129        return 0;
 130}
 131
 132static struct rb_node *
 133__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 134{
 135        if (limit_pfn <= iovad->dma_32bit_pfn)
 136                return iovad->cached32_node;
 137
 138        return iovad->cached_node;
 139}
 140
 141static void
 142__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 143{
 144        if (new->pfn_hi < iovad->dma_32bit_pfn)
 145                iovad->cached32_node = &new->node;
 146        else
 147                iovad->cached_node = &new->node;
 148}
 149
 150static void
 151__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 152{
 153        struct iova *cached_iova;
 154
 155        cached_iova = to_iova(iovad->cached32_node);
 156        if (free == cached_iova ||
 157            (free->pfn_hi < iovad->dma_32bit_pfn &&
 158             free->pfn_lo >= cached_iova->pfn_lo)) {
 159                iovad->cached32_node = rb_next(&free->node);
 160                iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 161        }
 162
 163        cached_iova = to_iova(iovad->cached_node);
 164        if (free->pfn_lo >= cached_iova->pfn_lo)
 165                iovad->cached_node = rb_next(&free->node);
 166}
 167
 168static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
 169{
 170        struct rb_node *node, *next;
 171        /*
 172         * Ideally what we'd like to judge here is whether limit_pfn is close
 173         * enough to the highest-allocated IOVA that starting the allocation
 174         * walk from the anchor node will be quicker than this initial work to
 175         * find an exact starting point (especially if that ends up being the
 176         * anchor node anyway). This is an incredibly crude approximation which
 177         * only really helps the most likely case, but is at least trivially easy.
 178         */
 179        if (limit_pfn > iovad->dma_32bit_pfn)
 180                return &iovad->anchor.node;
 181
 182        node = iovad->rbroot.rb_node;
 183        while (to_iova(node)->pfn_hi < limit_pfn)
 184                node = node->rb_right;
 185
 186search_left:
 187        while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
 188                node = node->rb_left;
 189
 190        if (!node->rb_left)
 191                return node;
 192
 193        next = node->rb_left;
 194        while (next->rb_right) {
 195                next = next->rb_right;
 196                if (to_iova(next)->pfn_lo >= limit_pfn) {
 197                        node = next;
 198                        goto search_left;
 199                }
 200        }
 201
 202        return node;
 203}
 204
 205/* Insert the iova into domain rbtree by holding writer lock */
 206static void
 207iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 208                   struct rb_node *start)
 209{
 210        struct rb_node **new, *parent = NULL;
 211
 212        new = (start) ? &start : &(root->rb_node);
 213        /* Figure out where to put new node */
 214        while (*new) {
 215                struct iova *this = to_iova(*new);
 216
 217                parent = *new;
 218
 219                if (iova->pfn_lo < this->pfn_lo)
 220                        new = &((*new)->rb_left);
 221                else if (iova->pfn_lo > this->pfn_lo)
 222                        new = &((*new)->rb_right);
 223                else {
 224                        WARN_ON(1); /* this should not happen */
 225                        return;
 226                }
 227        }
 228        /* Add new node and rebalance tree. */
 229        rb_link_node(&iova->node, parent, new);
 230        rb_insert_color(&iova->node, root);
 231}
 232
 233static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 234                unsigned long size, unsigned long limit_pfn,
 235                        struct iova *new, bool size_aligned)
 236{
 237        struct rb_node *curr, *prev;
 238        struct iova *curr_iova;
 239        unsigned long flags;
 240        unsigned long new_pfn, retry_pfn;
 241        unsigned long align_mask = ~0UL;
 242        unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 243
 244        if (size_aligned)
 245                align_mask <<= fls_long(size - 1);
 246
 247        /* Walk the tree backwards */
 248        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 249        if (limit_pfn <= iovad->dma_32bit_pfn &&
 250                        size >= iovad->max32_alloc_size)
 251                goto iova32_full;
 252
 253        curr = __get_cached_rbnode(iovad, limit_pfn);
 254        curr_iova = to_iova(curr);
 255        retry_pfn = curr_iova->pfn_hi + 1;
 256
 257retry:
 258        do {
 259                high_pfn = min(high_pfn, curr_iova->pfn_lo);
 260                new_pfn = (high_pfn - size) & align_mask;
 261                prev = curr;
 262                curr = rb_prev(curr);
 263                curr_iova = to_iova(curr);
 264        } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
 265
 266        if (high_pfn < size || new_pfn < low_pfn) {
 267                if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
 268                        high_pfn = limit_pfn;
 269                        low_pfn = retry_pfn;
 270                        curr = iova_find_limit(iovad, limit_pfn);
 271                        curr_iova = to_iova(curr);
 272                        goto retry;
 273                }
 274                iovad->max32_alloc_size = size;
 275                goto iova32_full;
 276        }
 277
 278        /* pfn_lo will point to size aligned address if size_aligned is set */
 279        new->pfn_lo = new_pfn;
 280        new->pfn_hi = new->pfn_lo + size - 1;
 281
 282        /* If we have 'prev', it's a valid place to start the insertion. */
 283        iova_insert_rbtree(&iovad->rbroot, new, prev);
 284        __cached_rbnode_insert_update(iovad, new);
 285
 286        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 287        return 0;
 288
 289iova32_full:
 290        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 291        return -ENOMEM;
 292}
 293
 294static struct kmem_cache *iova_cache;
 295static unsigned int iova_cache_users;
 296static DEFINE_MUTEX(iova_cache_mutex);
 297
 298static struct iova *alloc_iova_mem(void)
 299{
 300        return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
 301}
 302
 303static void free_iova_mem(struct iova *iova)
 304{
 305        if (iova->pfn_lo != IOVA_ANCHOR)
 306                kmem_cache_free(iova_cache, iova);
 307}
 308
 309int iova_cache_get(void)
 310{
 311        mutex_lock(&iova_cache_mutex);
 312        if (!iova_cache_users) {
 313                int ret;
 314
 315                ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
 316                                        iova_cpuhp_dead);
 317                if (ret) {
 318                        mutex_unlock(&iova_cache_mutex);
 319                        pr_err("Couldn't register cpuhp handler\n");
 320                        return ret;
 321                }
 322
 323                iova_cache = kmem_cache_create(
 324                        "iommu_iova", sizeof(struct iova), 0,
 325                        SLAB_HWCACHE_ALIGN, NULL);
 326                if (!iova_cache) {
 327                        cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
 328                        mutex_unlock(&iova_cache_mutex);
 329                        pr_err("Couldn't create iova cache\n");
 330                        return -ENOMEM;
 331                }
 332        }
 333
 334        iova_cache_users++;
 335        mutex_unlock(&iova_cache_mutex);
 336
 337        return 0;
 338}
 339EXPORT_SYMBOL_GPL(iova_cache_get);
 340
 341void iova_cache_put(void)
 342{
 343        mutex_lock(&iova_cache_mutex);
 344        if (WARN_ON(!iova_cache_users)) {
 345                mutex_unlock(&iova_cache_mutex);
 346                return;
 347        }
 348        iova_cache_users--;
 349        if (!iova_cache_users) {
 350                cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
 351                kmem_cache_destroy(iova_cache);
 352        }
 353        mutex_unlock(&iova_cache_mutex);
 354}
 355EXPORT_SYMBOL_GPL(iova_cache_put);
 356
 357/**
 358 * alloc_iova - allocates an iova
 359 * @iovad: - iova domain in question
 360 * @size: - size of page frames to allocate
 361 * @limit_pfn: - max limit address
 362 * @size_aligned: - set if size_aligned address range is required
 363 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 364 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 365 * flag is set then the allocated address iova->pfn_lo will be naturally
 366 * aligned on roundup_power_of_two(size).
 367 */
 368struct iova *
 369alloc_iova(struct iova_domain *iovad, unsigned long size,
 370        unsigned long limit_pfn,
 371        bool size_aligned)
 372{
 373        struct iova *new_iova;
 374        int ret;
 375
 376        new_iova = alloc_iova_mem();
 377        if (!new_iova)
 378                return NULL;
 379
 380        ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 381                        new_iova, size_aligned);
 382
 383        if (ret) {
 384                free_iova_mem(new_iova);
 385                return NULL;
 386        }
 387
 388        return new_iova;
 389}
 390EXPORT_SYMBOL_GPL(alloc_iova);
 391
 392static struct iova *
 393private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 394{
 395        struct rb_node *node = iovad->rbroot.rb_node;
 396
 397        assert_spin_locked(&iovad->iova_rbtree_lock);
 398
 399        while (node) {
 400                struct iova *iova = to_iova(node);
 401
 402                if (pfn < iova->pfn_lo)
 403                        node = node->rb_left;
 404                else if (pfn > iova->pfn_hi)
 405                        node = node->rb_right;
 406                else
 407                        return iova;    /* pfn falls within iova's range */
 408        }
 409
 410        return NULL;
 411}
 412
 413static void remove_iova(struct iova_domain *iovad, struct iova *iova)
 414{
 415        assert_spin_locked(&iovad->iova_rbtree_lock);
 416        __cached_rbnode_delete_update(iovad, iova);
 417        rb_erase(&iova->node, &iovad->rbroot);
 418}
 419
 420/**
 421 * find_iova - finds an iova for a given pfn
 422 * @iovad: - iova domain in question.
 423 * @pfn: - page frame number
 424 * This function finds and returns an iova belonging to the
 425 * given domain which matches the given pfn.
 426 */
 427struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 428{
 429        unsigned long flags;
 430        struct iova *iova;
 431
 432        /* Take the lock so that no other thread is manipulating the rbtree */
 433        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 434        iova = private_find_iova(iovad, pfn);
 435        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 436        return iova;
 437}
 438EXPORT_SYMBOL_GPL(find_iova);
 439
 440/**
 441 * __free_iova - frees the given iova
 442 * @iovad: iova domain in question.
 443 * @iova: iova in question.
 444 * Frees the given iova belonging to the giving domain
 445 */
 446void
 447__free_iova(struct iova_domain *iovad, struct iova *iova)
 448{
 449        unsigned long flags;
 450
 451        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 452        remove_iova(iovad, iova);
 453        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 454        free_iova_mem(iova);
 455}
 456EXPORT_SYMBOL_GPL(__free_iova);
 457
 458/**
 459 * free_iova - finds and frees the iova for a given pfn
 460 * @iovad: - iova domain in question.
 461 * @pfn: - pfn that is allocated previously
 462 * This functions finds an iova for a given pfn and then
 463 * frees the iova from that domain.
 464 */
 465void
 466free_iova(struct iova_domain *iovad, unsigned long pfn)
 467{
 468        unsigned long flags;
 469        struct iova *iova;
 470
 471        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 472        iova = private_find_iova(iovad, pfn);
 473        if (!iova) {
 474                spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 475                return;
 476        }
 477        remove_iova(iovad, iova);
 478        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 479        free_iova_mem(iova);
 480}
 481EXPORT_SYMBOL_GPL(free_iova);
 482
 483/**
 484 * alloc_iova_fast - allocates an iova from rcache
 485 * @iovad: - iova domain in question
 486 * @size: - size of page frames to allocate
 487 * @limit_pfn: - max limit address
 488 * @flush_rcache: - set to flush rcache on regular allocation failure
 489 * This function tries to satisfy an iova allocation from the rcache,
 490 * and falls back to regular allocation on failure. If regular allocation
 491 * fails too and the flush_rcache flag is set then the rcache will be flushed.
 492*/
 493unsigned long
 494alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 495                unsigned long limit_pfn, bool flush_rcache)
 496{
 497        unsigned long iova_pfn;
 498        struct iova *new_iova;
 499
 500        iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
 501        if (iova_pfn)
 502                return iova_pfn;
 503
 504retry:
 505        new_iova = alloc_iova(iovad, size, limit_pfn, true);
 506        if (!new_iova) {
 507                unsigned int cpu;
 508
 509                if (!flush_rcache)
 510                        return 0;
 511
 512                /* Try replenishing IOVAs by flushing rcache. */
 513                flush_rcache = false;
 514                for_each_online_cpu(cpu)
 515                        free_cpu_cached_iovas(cpu, iovad);
 516                free_global_cached_iovas(iovad);
 517                goto retry;
 518        }
 519
 520        return new_iova->pfn_lo;
 521}
 522EXPORT_SYMBOL_GPL(alloc_iova_fast);
 523
 524/**
 525 * free_iova_fast - free iova pfn range into rcache
 526 * @iovad: - iova domain in question.
 527 * @pfn: - pfn that is allocated previously
 528 * @size: - # of pages in range
 529 * This functions frees an iova range by trying to put it into the rcache,
 530 * falling back to regular iova deallocation via free_iova() if this fails.
 531 */
 532void
 533free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 534{
 535        if (iova_rcache_insert(iovad, pfn, size))
 536                return;
 537
 538        free_iova(iovad, pfn);
 539}
 540EXPORT_SYMBOL_GPL(free_iova_fast);
 541
 542#define fq_ring_for_each(i, fq) \
 543        for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
 544
 545static inline bool fq_full(struct iova_fq *fq)
 546{
 547        assert_spin_locked(&fq->lock);
 548        return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 549}
 550
 551static inline unsigned fq_ring_add(struct iova_fq *fq)
 552{
 553        unsigned idx = fq->tail;
 554
 555        assert_spin_locked(&fq->lock);
 556
 557        fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 558
 559        return idx;
 560}
 561
 562static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 563{
 564        u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 565        unsigned idx;
 566
 567        assert_spin_locked(&fq->lock);
 568
 569        fq_ring_for_each(idx, fq) {
 570
 571                if (fq->entries[idx].counter >= counter)
 572                        break;
 573
 574                if (iovad->entry_dtor)
 575                        iovad->entry_dtor(fq->entries[idx].data);
 576
 577                free_iova_fast(iovad,
 578                               fq->entries[idx].iova_pfn,
 579                               fq->entries[idx].pages);
 580
 581                fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 582        }
 583}
 584
 585static void iova_domain_flush(struct iova_domain *iovad)
 586{
 587        atomic64_inc(&iovad->fq_flush_start_cnt);
 588        iovad->flush_cb(iovad);
 589        atomic64_inc(&iovad->fq_flush_finish_cnt);
 590}
 591
 592static void fq_destroy_all_entries(struct iova_domain *iovad)
 593{
 594        int cpu;
 595
 596        /*
 597         * This code runs when the iova_domain is being detroyed, so don't
 598         * bother to free iovas, just call the entry_dtor on all remaining
 599         * entries.
 600         */
 601        if (!iovad->entry_dtor)
 602                return;
 603
 604        for_each_possible_cpu(cpu) {
 605                struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
 606                int idx;
 607
 608                fq_ring_for_each(idx, fq)
 609                        iovad->entry_dtor(fq->entries[idx].data);
 610        }
 611}
 612
 613static void fq_flush_timeout(struct timer_list *t)
 614{
 615        struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
 616        int cpu;
 617
 618        atomic_set(&iovad->fq_timer_on, 0);
 619        iova_domain_flush(iovad);
 620
 621        for_each_possible_cpu(cpu) {
 622                unsigned long flags;
 623                struct iova_fq *fq;
 624
 625                fq = per_cpu_ptr(iovad->fq, cpu);
 626                spin_lock_irqsave(&fq->lock, flags);
 627                fq_ring_free(iovad, fq);
 628                spin_unlock_irqrestore(&fq->lock, flags);
 629        }
 630}
 631
 632void queue_iova(struct iova_domain *iovad,
 633                unsigned long pfn, unsigned long pages,
 634                unsigned long data)
 635{
 636        struct iova_fq *fq;
 637        unsigned long flags;
 638        unsigned idx;
 639
 640        /*
 641         * Order against the IOMMU driver's pagetable update from unmapping
 642         * @pte, to guarantee that iova_domain_flush() observes that if called
 643         * from a different CPU before we release the lock below. Full barrier
 644         * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
 645         * written fq state here.
 646         */
 647        smp_mb();
 648
 649        fq = raw_cpu_ptr(iovad->fq);
 650        spin_lock_irqsave(&fq->lock, flags);
 651
 652        /*
 653         * First remove all entries from the flush queue that have already been
 654         * flushed out on another CPU. This makes the fq_full() check below less
 655         * likely to be true.
 656         */
 657        fq_ring_free(iovad, fq);
 658
 659        if (fq_full(fq)) {
 660                iova_domain_flush(iovad);
 661                fq_ring_free(iovad, fq);
 662        }
 663
 664        idx = fq_ring_add(fq);
 665
 666        fq->entries[idx].iova_pfn = pfn;
 667        fq->entries[idx].pages    = pages;
 668        fq->entries[idx].data     = data;
 669        fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 670
 671        spin_unlock_irqrestore(&fq->lock, flags);
 672
 673        /* Avoid false sharing as much as possible. */
 674        if (!atomic_read(&iovad->fq_timer_on) &&
 675            !atomic_xchg(&iovad->fq_timer_on, 1))
 676                mod_timer(&iovad->fq_timer,
 677                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 678}
 679
 680/**
 681 * put_iova_domain - destroys the iova domain
 682 * @iovad: - iova domain in question.
 683 * All the iova's in that domain are destroyed.
 684 */
 685void put_iova_domain(struct iova_domain *iovad)
 686{
 687        struct iova *iova, *tmp;
 688
 689        cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
 690                                            &iovad->cpuhp_dead);
 691
 692        free_iova_flush_queue(iovad);
 693        free_iova_rcaches(iovad);
 694        rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
 695                free_iova_mem(iova);
 696}
 697EXPORT_SYMBOL_GPL(put_iova_domain);
 698
 699static int
 700__is_range_overlap(struct rb_node *node,
 701        unsigned long pfn_lo, unsigned long pfn_hi)
 702{
 703        struct iova *iova = to_iova(node);
 704
 705        if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 706                return 1;
 707        return 0;
 708}
 709
 710static inline struct iova *
 711alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 712{
 713        struct iova *iova;
 714
 715        iova = alloc_iova_mem();
 716        if (iova) {
 717                iova->pfn_lo = pfn_lo;
 718                iova->pfn_hi = pfn_hi;
 719        }
 720
 721        return iova;
 722}
 723
 724static struct iova *
 725__insert_new_range(struct iova_domain *iovad,
 726        unsigned long pfn_lo, unsigned long pfn_hi)
 727{
 728        struct iova *iova;
 729
 730        iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 731        if (iova)
 732                iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 733
 734        return iova;
 735}
 736
 737static void
 738__adjust_overlap_range(struct iova *iova,
 739        unsigned long *pfn_lo, unsigned long *pfn_hi)
 740{
 741        if (*pfn_lo < iova->pfn_lo)
 742                iova->pfn_lo = *pfn_lo;
 743        if (*pfn_hi > iova->pfn_hi)
 744                *pfn_lo = iova->pfn_hi + 1;
 745}
 746
 747/**
 748 * reserve_iova - reserves an iova in the given range
 749 * @iovad: - iova domain pointer
 750 * @pfn_lo: - lower page frame address
 751 * @pfn_hi:- higher pfn adderss
 752 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 753 * that this address is not dished out as part of alloc_iova.
 754 */
 755struct iova *
 756reserve_iova(struct iova_domain *iovad,
 757        unsigned long pfn_lo, unsigned long pfn_hi)
 758{
 759        struct rb_node *node;
 760        unsigned long flags;
 761        struct iova *iova;
 762        unsigned int overlap = 0;
 763
 764        /* Don't allow nonsensical pfns */
 765        if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
 766                return NULL;
 767
 768        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 769        for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 770                if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 771                        iova = to_iova(node);
 772                        __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 773                        if ((pfn_lo >= iova->pfn_lo) &&
 774                                (pfn_hi <= iova->pfn_hi))
 775                                goto finish;
 776                        overlap = 1;
 777
 778                } else if (overlap)
 779                                break;
 780        }
 781
 782        /* We are here either because this is the first reserver node
 783         * or need to insert remaining non overlap addr range
 784         */
 785        iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 786finish:
 787
 788        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 789        return iova;
 790}
 791EXPORT_SYMBOL_GPL(reserve_iova);
 792
 793/*
 794 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 795 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 796 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 797 * For simplicity, we use a static magazine size and don't implement the
 798 * dynamic size tuning described in the paper.
 799 */
 800
 801#define IOVA_MAG_SIZE 128
 802
 803struct iova_magazine {
 804        unsigned long size;
 805        unsigned long pfns[IOVA_MAG_SIZE];
 806};
 807
 808struct iova_cpu_rcache {
 809        spinlock_t lock;
 810        struct iova_magazine *loaded;
 811        struct iova_magazine *prev;
 812};
 813
 814static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 815{
 816        return kzalloc(sizeof(struct iova_magazine), flags);
 817}
 818
 819static void iova_magazine_free(struct iova_magazine *mag)
 820{
 821        kfree(mag);
 822}
 823
 824static void
 825iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 826{
 827        unsigned long flags;
 828        int i;
 829
 830        if (!mag)
 831                return;
 832
 833        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 834
 835        for (i = 0 ; i < mag->size; ++i) {
 836                struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 837
 838                if (WARN_ON(!iova))
 839                        continue;
 840
 841                remove_iova(iovad, iova);
 842                free_iova_mem(iova);
 843        }
 844
 845        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 846
 847        mag->size = 0;
 848}
 849
 850static bool iova_magazine_full(struct iova_magazine *mag)
 851{
 852        return (mag && mag->size == IOVA_MAG_SIZE);
 853}
 854
 855static bool iova_magazine_empty(struct iova_magazine *mag)
 856{
 857        return (!mag || mag->size == 0);
 858}
 859
 860static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 861                                       unsigned long limit_pfn)
 862{
 863        int i;
 864        unsigned long pfn;
 865
 866        BUG_ON(iova_magazine_empty(mag));
 867
 868        /* Only fall back to the rbtree if we have no suitable pfns at all */
 869        for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
 870                if (i == 0)
 871                        return 0;
 872
 873        /* Swap it to pop it */
 874        pfn = mag->pfns[i];
 875        mag->pfns[i] = mag->pfns[--mag->size];
 876
 877        return pfn;
 878}
 879
 880static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 881{
 882        BUG_ON(iova_magazine_full(mag));
 883
 884        mag->pfns[mag->size++] = pfn;
 885}
 886
 887static void init_iova_rcaches(struct iova_domain *iovad)
 888{
 889        struct iova_cpu_rcache *cpu_rcache;
 890        struct iova_rcache *rcache;
 891        unsigned int cpu;
 892        int i;
 893
 894        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 895                rcache = &iovad->rcaches[i];
 896                spin_lock_init(&rcache->lock);
 897                rcache->depot_size = 0;
 898                rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
 899                if (WARN_ON(!rcache->cpu_rcaches))
 900                        continue;
 901                for_each_possible_cpu(cpu) {
 902                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 903                        spin_lock_init(&cpu_rcache->lock);
 904                        cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 905                        cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 906                }
 907        }
 908}
 909
 910/*
 911 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 912 * return true on success.  Can fail if rcache is full and we can't free
 913 * space, and free_iova() (our only caller) will then return the IOVA
 914 * range to the rbtree instead.
 915 */
 916static bool __iova_rcache_insert(struct iova_domain *iovad,
 917                                 struct iova_rcache *rcache,
 918                                 unsigned long iova_pfn)
 919{
 920        struct iova_magazine *mag_to_free = NULL;
 921        struct iova_cpu_rcache *cpu_rcache;
 922        bool can_insert = false;
 923        unsigned long flags;
 924
 925        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 926        spin_lock_irqsave(&cpu_rcache->lock, flags);
 927
 928        if (!iova_magazine_full(cpu_rcache->loaded)) {
 929                can_insert = true;
 930        } else if (!iova_magazine_full(cpu_rcache->prev)) {
 931                swap(cpu_rcache->prev, cpu_rcache->loaded);
 932                can_insert = true;
 933        } else {
 934                struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 935
 936                if (new_mag) {
 937                        spin_lock(&rcache->lock);
 938                        if (rcache->depot_size < MAX_GLOBAL_MAGS) {
 939                                rcache->depot[rcache->depot_size++] =
 940                                                cpu_rcache->loaded;
 941                        } else {
 942                                mag_to_free = cpu_rcache->loaded;
 943                        }
 944                        spin_unlock(&rcache->lock);
 945
 946                        cpu_rcache->loaded = new_mag;
 947                        can_insert = true;
 948                }
 949        }
 950
 951        if (can_insert)
 952                iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 953
 954        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 955
 956        if (mag_to_free) {
 957                iova_magazine_free_pfns(mag_to_free, iovad);
 958                iova_magazine_free(mag_to_free);
 959        }
 960
 961        return can_insert;
 962}
 963
 964static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 965                               unsigned long size)
 966{
 967        unsigned int log_size = order_base_2(size);
 968
 969        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 970                return false;
 971
 972        return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 973}
 974
 975/*
 976 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 977 * satisfy the request, return a matching non-NULL range and remove
 978 * it from the 'rcache'.
 979 */
 980static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 981                                       unsigned long limit_pfn)
 982{
 983        struct iova_cpu_rcache *cpu_rcache;
 984        unsigned long iova_pfn = 0;
 985        bool has_pfn = false;
 986        unsigned long flags;
 987
 988        cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 989        spin_lock_irqsave(&cpu_rcache->lock, flags);
 990
 991        if (!iova_magazine_empty(cpu_rcache->loaded)) {
 992                has_pfn = true;
 993        } else if (!iova_magazine_empty(cpu_rcache->prev)) {
 994                swap(cpu_rcache->prev, cpu_rcache->loaded);
 995                has_pfn = true;
 996        } else {
 997                spin_lock(&rcache->lock);
 998                if (rcache->depot_size > 0) {
 999                        iova_magazine_free(cpu_rcache->loaded);
1000                        cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
1001                        has_pfn = true;
1002                }
1003                spin_unlock(&rcache->lock);
1004        }
1005
1006        if (has_pfn)
1007                iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
1008
1009        spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1010
1011        return iova_pfn;
1012}
1013
1014/*
1015 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
1016 * size is too big or the DMA limit we are given isn't satisfied by the
1017 * top element in the magazine.
1018 */
1019static unsigned long iova_rcache_get(struct iova_domain *iovad,
1020                                     unsigned long size,
1021                                     unsigned long limit_pfn)
1022{
1023        unsigned int log_size = order_base_2(size);
1024
1025        if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1026                return 0;
1027
1028        return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1029}
1030
1031/*
1032 * free rcache data structures.
1033 */
1034static void free_iova_rcaches(struct iova_domain *iovad)
1035{
1036        struct iova_rcache *rcache;
1037        struct iova_cpu_rcache *cpu_rcache;
1038        unsigned int cpu;
1039        int i, j;
1040
1041        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1042                rcache = &iovad->rcaches[i];
1043                for_each_possible_cpu(cpu) {
1044                        cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1045                        iova_magazine_free(cpu_rcache->loaded);
1046                        iova_magazine_free(cpu_rcache->prev);
1047                }
1048                free_percpu(rcache->cpu_rcaches);
1049                for (j = 0; j < rcache->depot_size; ++j)
1050                        iova_magazine_free(rcache->depot[j]);
1051        }
1052}
1053
1054/*
1055 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1056 */
1057static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1058{
1059        struct iova_cpu_rcache *cpu_rcache;
1060        struct iova_rcache *rcache;
1061        unsigned long flags;
1062        int i;
1063
1064        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1065                rcache = &iovad->rcaches[i];
1066                cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1067                spin_lock_irqsave(&cpu_rcache->lock, flags);
1068                iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1069                iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1070                spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1071        }
1072}
1073
1074/*
1075 * free all the IOVA ranges of global cache
1076 */
1077static void free_global_cached_iovas(struct iova_domain *iovad)
1078{
1079        struct iova_rcache *rcache;
1080        unsigned long flags;
1081        int i, j;
1082
1083        for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1084                rcache = &iovad->rcaches[i];
1085                spin_lock_irqsave(&rcache->lock, flags);
1086                for (j = 0; j < rcache->depot_size; ++j) {
1087                        iova_magazine_free_pfns(rcache->depot[j], iovad);
1088                        iova_magazine_free(rcache->depot[j]);
1089                }
1090                rcache->depot_size = 0;
1091                spin_unlock_irqrestore(&rcache->lock, flags);
1092        }
1093}
1094MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1095MODULE_LICENSE("GPL");
1096